Individual offenses eported in TOP databases and Prossecutors Office’s Records

Show code
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"
Show code
 $(document).ready(function() {
    $('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
    // onClick function for all plots (img's)
    $('img:not(.zoomImg)').click(function() {
      $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
      $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
    });
    // onClick function for zoomImg
    $('img.zoomImg').click(function() {
      $('.zoomDiv').css({opacity: '0', width: '0%'}); 
    });
  });
  
Show code
<script src="hideOutput.js"></script> 
Show code
$(document).ready(function() {    
    $chunks = $('.fold');    
    $chunks.each(function () {      // add button to source code chunks     
    if ( $(this).hasClass('s') ) {       
        $('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
            $('pre.r', this).children('code').attr('class', 'folded');     
            }      // add button to output chunks     
        if ( $(this).hasClass('o') ) {       
            $('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");       
            $('pre:not(.r)', this).children('code:not(r)').addClass('folded');        // add button to plots       
            $(this).find('img').wrap('<pre class=\"plot\"></pre>');       
            $('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");       
            $('pre.plot', this).children('img').addClass('folded');      
            }   
});    // hide all chunks when document is loaded   
    $('.folded').css('display', 'none')    // function to toggle the visibility   
    $('.showopt').click(function() {     
            var label = $(this).html();     
            if (label.indexOf("Show") >= 0) {       
                $(this).html(label.replace("Show", "Hide"));     
            } else {
              $(this).html(label.replace("Hide", "Show"));     
            }     
    $(this).siblings('code, img').slideToggle('fast', 'swing');   
    }); 
}); 


Bring other databases

Show code
### 1.- What is Base_fiscalia_v9, why it has 49,970 IDs & 174,961 rows : paste0("Patients in PO records with unique combination of ID, RUC, end type and date of comission of the offense and offense ", a_tab11_lab_aft_d1 (p= 5,470; RUCs= 5,993; n= 8,291) --> should be only people that commit offenses, not victims)

### 2.- why the difference of 49,970- 49,252????
### 3.- 

Base_fiscalia_v10b_dic_2022<-
  sqldf("SELECT *
  FROM CONS_C1_df_dup_SEP_2020_22_d AS x  
  LEFT JOIN (SELECT *
             FROM Base_fiscalia_v9
             ) AS y
  ON x.hash_key == y.id AND 
  x.dup = 1
  ") #2022-11-25  added dup // #changed the direction to past events, where age at discharge is greater than the age of commission //FEB 2023: WHERE ano_bd_first='2015' OR ano_bd_first='2016' OR ano_bd_first='2017' OR ano_bd_first='2018' OR ano_bd_first='2019'

message(
paste0("First, for each baseline admission of patients in C1 (", format(length(unique(CONS_C1_df_dup_SEP_2020_22_d$hash_key)), big.mark=","), "; n= ",format(nrow(CONS_C1_df_dup_SEP_2020_22_d), big.mark=","),"), we found the PO records in which is found as the offender (p=", format(length(unique(Base_fiscalia_v9$id)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v9), big.mark=","),") (p= ", format(length(unique(Base_fiscalia_v10b_dic_2022$hash_key)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v10b_dic_2022), big.mark=","),"). We did not exclude patients in C1 that were registered in databases before 2015 in this step (Feb 2023)")
)

First, for each baseline admission of patients in C1 (85,048; n= 85,048), we found the PO records in which is found as the offender (p=49,970; n= 174,961) (p= 85,048; n= 209,646). We did not exclude patients in C1 that were registered in databases before 2015 in this step (Feb 2023)

Show code
invisible("49,970 patients")

Base_fiscalia_v11b_dic_2022<-
  Base_fiscalia_v10b_dic_2022 %>% 
  #discrepancies in names of variables
  janitor::clean_names() %>%   #janitor::tabyl(!is.na(dob_imp_num))
  #previously recoded, 
  dplyr::select(-dateofbirth_imp, -country, -victim, -id_victim, -crime_code_c , -reg_c, -end_type_2c, -cod_comunadelito, -cod_lugarocurrencia, -sex_imp, -region_delito, -filter, -id)%>%
  plyr::rename(c("dateofbirth_imp_2"="dateofbirth_imp")) %>% 
  dplyr::ungroup() %>% 
  #selected the first row with distinct information regarding patient ID, case ID, crime code.
  dplyr::group_by(hash_key, dateofbirth_imp, caseid, crime_code_group_rec_prof) %>%
  dplyr::slice(1) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(hash_key,dateofbirth_imp) %>% 
  summarise(n_off_acq= ifelse(sum(crime_code_group_rec_prof=="Acquisitive", na.rm=T)>0, 1,0), n_off_vio= ifelse(sum(crime_code_group_rec_prof=="Violent", na.rm=T)>0, 1,0), n_off_sud= ifelse(sum(crime_code_group_rec_prof== "Substance-related", na.rm=T)>0, 1,0), n_off_oth=  ifelse(sum(crime_code_group_rec_prof== "Other", na.rm=T)>0, 1,0)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(n_off= rowSums(dplyr::select(., dplyr::starts_with("n_")), na.rm=T)) 

summarise: now 85,048 rows and 6 columns, one group variable remaining (hash_key)

Show code
message(paste0( "Afterwards, we counted the offenses by type of offense (Acquisitive, violent, substance-related and other) and the total by each user before baseline admisson (p= ", format(length(unique(Base_fiscalia_v11b_dic_2022$hash_key)), big.mark=","), "; cases with records in P.O.= ",format(as.numeric(table(is.na(Base_fiscalia_v10b_dic_2022$dateofbirth_imp))[[2]]), big.mark=","),")." ))

Afterwards, we counted the offenses by type of offense (Acquisitive, violent, substance-related and other) and the total by each user before baseline admisson (p= 85,048; cases with records in P.O.= 35,345).

Time for this code chunk to run: 0.5 minutes

Show code
#Registrar hurtos, robos, violencia intrafamiliar y otras acciones cometidas en las últimas 4 semanas
#Violencia Intrafamiliar (Maltrato físico o psicológico)
CONS_TOP_2022<-
  # 107307
  CONS_TOP%>% 
  # dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v9, id), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "id"))%>% 
  dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v11b_dic_2022, hash_key), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "hash_key"))%>% 
  dplyr::mutate(fech_ap_top_num= as.numeric(as.Date(str_sub(as.character(lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T)),1,10))))%>% #No parse failures
  dplyr::select(HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, dateofbirth_imp, Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro) %>% 
  dplyr::filter(!is.na(HASH_KEY)) %>% 
  dplyr::mutate_at(vars("Hurto", "Robo", "Venta.Drogas", "Riña", "Otro"), ~ifelse(.=="S",1,0)) %>% 
  dplyr::mutate(Total.VIF= ifelse(Total.VIF>0,1,0))%>% 
  dplyr::mutate(tot_off_top = base::rowSums(dplyr::select(.,c(Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro)), na.rm = T)) %>% 
  dplyr::mutate(dateofbirth_imp_num= as.numeric(dateofbirth_imp),
                fech_ap_top= lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T),
                edad_a_ap_top_num= lubridate::time_length(lubridate::interval(dateofbirth_imp, fech_ap_top),unit="years"),
                edad_b_ap_top_num= (fech_ap_top_num-dateofbirth_imp_num)/365.25,
                edad_a_ap_top_num_lim= edad_a_ap_top_num-(1/12),
                edad_b_ap_top_num_lim= edad_b_ap_top_num-(1/12)) %>% 
  dplyr::select(-dateofbirth_imp, -dateofbirth_imp_num) %>% 
  dplyr::filter(!is.na(edad_a_ap_top_num)) %>% 
  dplyr::group_by(HASH_KEY, edad_a_ap_top_num) %>%
  dplyr::slice(1) %>% 
  dplyr::ungroup()

ungroup: no grouping variables

Show code
message(paste0("Then, we standardized the varibles in TOP (e.g., dates format) and counted self-reported offenses, and also we standardized data with PO records (dates of birth) to get the age when the patient have answered TOP and the age previous [ (1/12)*365.25 ]=",round((1/12)*365.25,0)," days before TOP application. This resulted in ", format(nrow(CONS_TOP_2022), big.mark=","), " rows (combination of different application dates and patients) of ", format(length(unique(CONS_TOP_2022$HASH_KEY)), big.mark=","), " patients."))

Then, we standardized the varibles in TOP (e.g., dates format) and counted self-reported offenses, and also we standardized data with PO records (dates of birth) to get the age when the patient have answered TOP and the age previous [ (1/12)*365.25 ]=30 days before TOP application. This resulted in 54,159 rows (combination of different application dates and patients) of 20,939 patients.

Time for this code chunk to run: 0.1 minutes

Show code
Base_fiscalia_v9_dic_2022<-
Base_fiscalia_v9 %>% 
  dplyr::group_by(id, caseid, end_type, fec_comision_simple, crime_code_c) %>% 
  dplyr::slice(1) %>% 
  dplyr::ungroup()

message(
paste0("We reduced PO records to those that do not have the same sentence for the same offense committed at the same date in the same process (case ID/RUC) (p= ", format(length(unique(Base_fiscalia_v9_dic_2022$id)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v9_dic_2022), big.mark=","),")")
)

We reduced PO records to those that do not have the same sentence for the same offense committed at the same date in the same process (case ID/RUC) (p= 49,970; n= 166,670)

Show code
#busco en la base de datos de fiscalía, algún delito que haya cometido en el transcurso 
#del último mes anterior a la aplicación del TOP
Base_fiscalia_v13c_dic_2022<-
  sqldf("SELECT *
  FROM CONS_TOP_2022 AS x  
  LEFT JOIN (SELECT *
             FROM Base_fiscalia_v9_dic_2022
             ) AS y
  ON x.HASH_KEY == y.id AND 
  x.edad_a_ap_top_num > y.age_offending_imp AND 
  x.edad_a_ap_top_num_lim < y.age_offending_imp") #2022-11-25  added dup // #changed the direction to past events, where age at discharge is greater than the age of commission

message(
paste0("Of the total of TOP applications (p= ", format(length(unique(CONS_TOP_2022$HASH_KEY)), big.mark=","), "; n= ",format(nrow(CONS_TOP_2022), big.mark=","),"), we looked for offenses comitted in the period of thirty days before (`x.edad_a_ap_top_num_lim`) the application of TOP (p= ", format(length(unique(Base_fiscalia_v13c_dic_2022$HASH_KEY)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v13c_dic_2022), big.mark=","),"). If this database contains more records, is beacause there may be some patients that had more than one record as a result of more than one application of TOP. This will be resolved in the next step")
)

Of the total of TOP applications (p= 20,939; n= 54,159), we looked for offenses comitted in the period of thirty days before (x.edad_a_ap_top_num_lim) the application of TOP (p= 20,939; n= 54,290). If this database contains more records, is beacause there may be some patients that had more than one record as a result of more than one application of TOP. This will be resolved in the next step

Show code
#Luego, contar por cada combinación de HASH y fecha de aplicación, el número de delitos segun familai de delito
#crime_code_group
Base_fiscalia_v13c_dic_2022_2<-
  Base_fiscalia_v13c_dic_2022 %>% 
  dplyr::select(tidyr::any_of(c("HASH_KEY", "fech_ap_top_num", "Fecha.Aplicación.TOP", "Hurto", "Robo", "Venta.Drogas", "Riña",
                "Total.VIF", "Otro", "tot_off_top", "fech_ap_top", "edad_a_ap_top_num", "edad_b_ap_top_num",
                "edad_a_ap_top_num_lim", "edad_b_ap_top_num_lim", "caseid", "end_type", "fec_comision_simple",
                "crime_code_c", "crime_code_group_rec", "crime_code_group_rec_prof", "age_offending_imp")))
#HASH_KEY
#fech_ap_top_num
#Fecha.Aplicación.TOP
#Hurto
#Robo
#Venta.Drogas
#Riña
#Total.VIF
#Otro
#tot_off_top
#fech_ap_top
#edad_a_ap_top_num
#edad_b_ap_top_num
#edad_a_ap_top_num_lim
#edad_b_ap_top_num_lim
#caseid
#end_type
#fec_comision_simple
#crime_code_c
#crime_code_group_rec_prof
#crime_code_group_rec
#age_offending_imp

invisible("# 131 rows added, why?")
message("Patients than had more than one combination of application date and ID. That means, they answered TOP more than one time, and in at least 2 of these times they had previous records: "); Base_fiscalia_v13c_dic_2022_2 %>% 
    dplyr::count(HASH_KEY, fech_ap_top_num) %>% 
    dplyr::filter(n>1) %>% 
    dplyr::summarise(sum=sum(n), n=n(), tot=sum-n)

Patients than had more than one combination of application date and ID. That means, they answered TOP more than one time, and in at least 2 of these times they had previous records:

  sum   n tot
1 245 114 131
Show code
Base_fiscalia_v13c_dic_2022_3<-
Base_fiscalia_v13c_dic_2022_2 %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(vio=dplyr::if_else(crime_code_group_rec_prof=="Violent",1,0,0),
                acq=dplyr::if_else(crime_code_group_rec_prof=="Acquisitive",1,0,0),
                sud=dplyr::if_else(crime_code_group_rec_prof=="Substance-related",1,0,0),
                oth=dplyr::if_else(crime_code_group_rec_prof=="Other",1,0,0)) %>% 
  dplyr::mutate(tot= rowSums(dplyr::select(., c("vio","acq","sud","oth")), na.rm=T)) %>% 
  dplyr::mutate(acq_top= rowSums(dplyr::select(., c("Hurto","Robo")), na.rm=T)) %>%
  dplyr::mutate(sud_top= rowSums(dplyr::select(., c("Venta.Drogas")), na.rm=T)) %>%
  dplyr::mutate(vio_top= rowSums(dplyr::select(., c("Riña", "Total.VIF")), na.rm=T)) %>%
  dplyr::mutate(oth_top= rowSums(dplyr::select(., c("Otro")), na.rm=T)) %>% 
  dplyr::group_by(HASH_KEY, fech_ap_top_num) %>% 
  dplyr::mutate(vio=sum(vio,na.rm=T), acq=sum(acq,na.rm=T), sud=sum(sud,na.rm=T), 
                oth=sum(oth,na.rm=T), tot=sum(tot,na.rm=T), n=n()) %>%
  dplyr::slice(1) %>% 
  dplyr::ungroup() 
#Los que tienen todo pelado en caseid es porque no tienen fecha de comisión en los últimos 30 días
message(
paste0("For each combination of TOP application, count the number of previous offenses in PO records (those with 0s are people that did not committed an offense recorded by PO in the period previous to the application of TOP) (p= ", format(length(unique(Base_fiscalia_v13c_dic_2022_3$HASH_KEY)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v13c_dic_2022_3), big.mark=","),")")
)

For each combination of TOP application, count the number of previous offenses in PO records (those with 0s are people that did not committed an offense recorded by PO in the period previous to the application of TOP) (p= 20,939; n= 54,159)

Time for this code chunk to run: 0.4 minutes


Descriptives

Show code
#http://observatorio.ministeriodesarrollosocial.gob.cl/pobreza-comunal-2020

Comunas_PNDR <- readxl::read_excel("Clasificacion-comunas-PNDR.xlsx")%>% 
  dplyr::mutate(cod= dplyr::case_when(as.character(cod_com)=="16101"~"8401",
                                      as.character(cod_com)=="16102"~"8402",
                                      as.character(cod_com)=="16103"~"8406",
                                      as.character(cod_com)=="16104"~"8407",
                                      as.character(cod_com)=="16105"~"8410",
                                      as.character(cod_com)=="16106"~"8411",
                                      as.character(cod_com)=="16107"~"8413",
                                      as.character(cod_com)=="16108"~"8418",
                                      as.character(cod_com)=="16109"~"8421",
                                      as.character(cod_com)=="16201"~"8414",
                                      as.character(cod_com)=="16202"~"8403",
                                      as.character(cod_com)=="16203"~"8404",
                                      as.character(cod_com)=="16204"~"8408",
                                      as.character(cod_com)=="16205"~"8412",
                                      as.character(cod_com)=="16206"~"8415",
                                      as.character(cod_com)=="16207"~"8420",
                                      as.character(cod_com)=="16301"~"8416",
                                      as.character(cod_com)=="16302"~"8405",
                                      as.character(cod_com)=="16303"~"8409",
                                      as.character(cod_com)=="16304"~"8417",
                                      as.character(cod_com)=="16305"~"8419",
                                      T~ as.character(cod_com)
                                      ))

#http://observatorio.ministeriodesarrollosocial.gob.cl/pobreza-comunal-2011
pobr_mult_2020<-readxl::read_excel("Estimaciones_de_Tasa_de_Pobreza_por_Ingresos_por_Comunas_2020_revisada2022_09.xlsx", skip=1) %>% dplyr::mutate(anio=2020) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2019<-readxl::read_excel("Estimaciones_de_Tasa_de_Pobreza_por_Ingresos_por_Comunas_2020_revisada2022_09.xlsx", skip=1) %>% dplyr::mutate(anio=2019) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2018<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_multidimensional_2017.xlsx", skip=1) %>% dplyr::mutate(anio=2018) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2017<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_multidimensional_2017.xlsx", skip=1) %>% dplyr::mutate(anio=2017) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2016<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_multidimensional_2015.xlsx", skip=1) %>% dplyr::mutate(anio=2016) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2015<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_multidimensional_2015.xlsx", skip=1) %>% dplyr::mutate(anio=2015) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2014<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_2013.xlsx", skip=1)%>% dplyr::mutate(anio=2014) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2013<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_2013.xlsx", skip=1)%>% dplyr::mutate(anio=2013) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2012<-readxl::read_excel("Estimacion_tasa_de_pobreza_comunal_2011_(nueva _metodologia).xlsx", skip=1)%>% dplyr::mutate(anio=2012) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2011<-readxl::read_excel("Estimacion_tasa_de_pobreza_comunal_2011_(nueva _metodologia).xlsx", skip=1)%>% dplyr::mutate(anio=2011) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2010<-readxl::read_excel("Estimacion_tasa_de_pobreza_comunal_2011_(nueva _metodologia).xlsx", skip=1)%>% dplyr::mutate(anio=2010) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2009<-readxl::read_excel("PobrezaporComunas_SAE_20092011.xlsx", skip=3)%>% dplyr::mutate(anio=2009) %>% dplyr::select(anio, everything()) %>% dplyr::select(1:5) %>%  dplyr::rename_at(vars( contains("Incidencia pobreza") ), ~"porc_pobr") %>% dplyr::rename("Código"=2, "Nombre comuna"=3) %>%  dplyr::mutate(Región=rep("")) %>%  dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)

New names: * -> `...1` * -> ...2 * Límite inferior IC -> Límite inferior IC...3 * Incidencia pobreza -> Incidencia pobreza...4 * Límite superior IC -> Límite superior IC...5 * Límite inferior IC -> Límite inferior IC...6 * Incidencia pobreza -> Incidencia pobreza...7 * Límite superior IC -> Límite superior IC...8 * -> `...9` * -> ...10

Show code
pobr_mult_2008<-readxl::read_excel("PobrezaporComunas_SAE_20092011.xlsx", skip=3)%>% dplyr::mutate(anio=2008) %>% dplyr::select(anio, everything()) %>% dplyr::select(1:5) %>%  dplyr::rename_at(vars( contains("Incidencia pobreza") ), ~"porc_pobr") %>% dplyr::rename("Código"=2, "Nombre comuna"=3) %>%  dplyr::mutate(Región=rep("")) %>%  dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)

New names: * -> `...1` * -> ...2 * Límite inferior IC -> Límite inferior IC...3 * Incidencia pobreza -> Incidencia pobreza...4 * Límite superior IC -> Límite superior IC...5 * Límite inferior IC -> Límite inferior IC...6 * Incidencia pobreza -> Incidencia pobreza...7 * Límite superior IC -> Límite superior IC...8 * -> `...9` * -> ...10

Show code
pobr_mult_2007<-readxl::read_excel("PobrezaporComunas_SAE_20092011.xlsx", skip=3)%>% dplyr::mutate(anio=2007) %>% dplyr::select(anio, everything()) %>% dplyr::select(1:5) %>%  dplyr::rename_at(vars( contains("Incidencia pobreza") ), ~"porc_pobr") %>% dplyr::rename("Código"=2, "Nombre comuna"=3) %>%  dplyr::mutate(Región=rep("")) %>%  dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)

New names: * -> `...1` * -> ...2 * Límite inferior IC -> Límite inferior IC...3 * Incidencia pobreza -> Incidencia pobreza...4 * Límite superior IC -> Límite superior IC...5 * Límite inferior IC -> Límite inferior IC...6 * Incidencia pobreza -> Incidencia pobreza...7 * Límite superior IC -> Límite superior IC...8 * -> `...9` * -> ...10

Show code
# replace comuna= "16103" if strpos(strlower(comuna),"8406")>0
# replace comuna= "16104" if strpos(strlower(comuna),"8407")>0
# replace comuna= "16105" if strpos(strlower(comuna),"8410")>0
# replace comuna= "16106" if strpos(strlower(comuna),"8411")>0
# replace comuna= "16107" if strpos(strlower(comuna),"8413")>0
# replace comuna= "16108" if strpos(strlower(comuna),"8418")>0
# replace comuna= "16109" if strpos(strlower(comuna),"8421")>0
# replace comuna= "16201" if strpos(strlower(comuna),"8414")>0
# replace comuna= "16202" if strpos(strlower(comuna),"8403")>0
# replace comuna= "16203" if strpos(strlower(comuna),"8404")>0
# replace comuna= "16204" if strpos(strlower(comuna),"8408")>0
# replace comuna= "16205" if strpos(strlower(comuna),"8412")>0
# replace comuna= "16206" if strpos(strlower(comuna),"8415")>0
# replace comuna= "16207" if strpos(strlower(comuna),"8420")>0
# replace comuna= "16301" if strpos(strlower(comuna),"8416")>0
# replace comuna= "16302" if strpos(strlower(comuna),"8405")>0
# replace comuna= "16303" if strpos(strlower(comuna),"8409")>0
# replace comuna= "16304" if strpos(strlower(comuna),"8417")>0
# replace comuna= "16305" if strpos(strlower(comuna),"8419")>0
pobr_mult_2007_2020<-
rbind.data.frame(pobr_mult_2007, pobr_mult_2008, pobr_mult_2009, pobr_mult_2010, pobr_mult_2011, pobr_mult_2012, pobr_mult_2013, pobr_mult_2014, pobr_mult_2015, pobr_mult_2016, pobr_mult_2017, pobr_mult_2018, pobr_mult_2019, pobr_mult_2020) %>% 
  dplyr::mutate(cod= dplyr::case_when(Código=="16101"~"8401",
                                      Código=="16102"~"8402",
                                      Código=="16103"~"8406",
                                      Código=="16104"~"8407",
                                      Código=="16105"~"8410",
                                      Código=="16106"~"8411",
                                      Código=="16107"~"8413",
                                      Código=="16108"~"8418",
                                      Código=="16109"~"8421",
                                      Código=="16201"~"8414",
                                      Código=="16202"~"8403",
                                      Código=="16203"~"8404",
                                      Código=="16204"~"8408",
                                      Código=="16205"~"8412",
                                      Código=="16206"~"8415",
                                      Código=="16207"~"8420",
                                      Código=="16301"~"8416",
                                      Código=="16302"~"8405",
                                      Código=="16303"~"8409",
                                      Código=="16304"~"8417",
                                      Código=="16305"~"8419",
                                      T~ Código
                                      ))


#2023-02-01:classifications by municipality
class_centros <- readxl::read_excel(paste0(path,"/_ig_borquez/class_centros.xlsx"))%>%
    tidylog::left_join(dplyr::distinct(CONS_C1_df_dup_SEP_2020, id_centro, nombre_centro), by=c("nombre_centro_1" ="nombre_centro"))%>%
    dplyr::group_by(id_centro)%>%
    slice(1)%>% 
    dplyr::ungroup()%>%
    dplyr::mutate(id_centro=dplyr::case_when(grepl("Allende",nombre_centro_1) & is.na(id_centro)~ "475",  T~ as.character(id_centro)))%>%
    dplyr::group_by(id_centro)%>%
    slice(1)

left_join: added one column (id_centro) > rows only in x 30 > rows only in y ( 32) > matched rows 383 > ===== > rows total 413

Show code
    #dplyr::filter(grepl("Allende",nombre_centro_1))


CONS_C1_df_dup_SEP_2020_22_e<-
CONS_C1_df_dup_SEP_2020_22_d %>% 
  dplyr::mutate(comuna_residencia_cod_rec= as.character(readr::parse_number(as.character(comuna_residencia_cod))), anio_ing_tr= lubridate::epiyear(fech_ing)) %>% #glimpse()
  dplyr::left_join(pobr_mult_2007_2020[,c("anio", "cod","porc_pobr")], by= c("comuna_residencia_cod_rec"="cod", "anio_ing_tr"="anio")) %>% 
  dplyr::left_join(Comunas_PNDR[,c("cod", "Clasificación")], by= c("comuna_residencia_cod_rec"="cod"))%>%
  #2023-02-01
  dplyr::left_join(class_centros[,c("id_centro", "nombre_centro_1", "classification")], by= "id_centro")%>%
    purrr::when(nrow(.)>nrow(CONS_C1_df_dup_SEP_2020_22_d) ~ stop("More cases in the new database"), ~.) %>% 
  dplyr::mutate(via_adm_sus_prin_act= factor(dplyr::case_when(via_adm_sus_prin_act=="Injected Intravenously or Intramuscularly"~ "Other",T~as.character(via_adm_sus_prin_act)))) %>% 
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::mutate(con_quien_vive_joel=dplyr::case_when(
    grepl("Solo$",con_quien_vive, ignore.case=T)~"Alone",
    grepl("Con abuelos",con_quien_vive, ignore.case=T)~"Family of origin",
    grepl("Con hermanos",con_quien_vive, ignore.case=T)~"Family of origin",
    grepl("Con la madre \\(sola\\)",con_quien_vive, ignore.case=T)~"Family of origin",
    grepl("Con otro pariente",con_quien_vive, ignore.case=T)~"Others",
    grepl("con hijos y padres o familia",con_quien_vive, ignore.case=T)~"Family of origin",
    grepl("con la pareja y padres o familia de origen",con_quien_vive, ignore.case=T)~"With couple/children",
    grepl("con padres o familia de origen",con_quien_vive, ignore.case=T)~"Family of origin",
    #2021-10-01
    grepl("Únicamente con hijos",con_quien_vive, ignore.case=T)~"With couple/children",
    grepl("Únicamente con pareja",con_quien_vive, ignore.case=T)~"With couple/children",
    #2021-10-01
    grepl("Con la Pareja, Hijos y Padres o Familia de Origen",con_quien_vive, ignore.case=T)~"With couple/children", 
    grepl("Hijos y Padres o Familia de Origen",con_quien_vive, ignore.case=T)~"Family of origin",
    #2021-10-01
    grepl("Únicamente con la pareja e hijos",con_quien_vive, ignore.case=T)~"With couple/children",
    grepl("Con amigos",con_quien_vive, ignore.case=T)~"Others",
    grepl("Con otro NO pariente",con_quien_vive, ignore.case=T)~"Others",
    grepl("*Otros$",con_quien_vive, ignore.case=T)~"Others")) 
  
#TO CHECK IF SOME MUNICIPALLITIES DID NOT JOIN
  #dplyr::filter(is.na(porc_pobr)) %>% dplyr::select(comuna_residencia_cod, anio_ing_tr)

#TO CHECK IF SOME MUNICIPALLITIES DID NOT JOIN
  #dplyr::filter(is.na(porc_pobr)) %>% dplyr::select(comuna_residencia_cod, anio_ing_tr)  

#Private Therapeutic Community, ref
CONS_C1_df_dup_SEP_2020_22_e$clas_r <- relevel(factor(CONS_C1_df_dup_SEP_2020_22_e$Clasificación), ref = "Urbana")
CONS_C1_df_dup_SEP_2020_22_e$via_adm_sus_prin_act <- relevel(factor(CONS_C1_df_dup_SEP_2020_22_e$via_adm_sus_prin_act), ref = "Oral (drunk or eaten)")

Time for this code chunk to run: 0.1 minutes

Show code
cat("Recode to binary measures")
Recode to binary measures
Show code
Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$tot_off_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$tot_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$tot>=1,1,0)

Base_fiscalia_v13c_dic_2022_3$vio_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$vio>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$acq_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$acq>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$sud_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$sud>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$oth_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$oth>=1,1,0)

Base_fiscalia_v13c_dic_2022_3$acq_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$acq_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$sud_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$sud_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$vio_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$vio_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$oth_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$oth_top>=1,1,0)

Time for this code chunk to run: 0 minutes

Join the data of the comparison between TOP reports and PO records, with the baseline information at admission of each patient.

Show code
vars_db<-
c("edad_a_ap_top_num", "end_type", "fech_ap_top_num", "sex", "duplicates_filtered", "id_centro", "macrozona", "nombre_region", "comuna_residencia_cod", "escolaridad_rec", "estado_conyugal_2", "freq_cons_sus_prin", "via_adm_sus_prin_act", "con_quien_vive_joel", "sus_principal_mod", "origen_ingreso_mod", "numero_de_hijos_mod", "tenencia_de_la_vivienda_mod", "sus_ini_mod_mvv", "condicion_ocupacional_corr", "dg_trs_cons_sus_or", "comorbidity_icd_10", "clas_r", "classification")
  
Base_fiscalia_v13c_dic_2022_4 <-
Base_fiscalia_v13c_dic_2022_3 %>% 
  tidylog::left_join(CONS_C1_df_dup_SEP_2020_22_e, by=c("HASH_KEY"="hash_key")) %>% 
  dplyr::select(any_of(c("HASH_KEY",vars_db, paste0(c("acq","sud","vio","oth"),"_bin"), paste0(c("acq","sud","vio","oth"),"_top_bin"), "numero_de_hijos_mod", "tot_off_top","tot_off_top_bin", "tot", "tot_bin", "edad_a_ap_top_num", "fech_ap_top"))) %>% 
   #multinomial
  dplyr::mutate(discrepancies=dplyr::case_when(tot> tot_off_top~ "underreport", tot< tot_off_top~ "overreport", T~ "coincidence")) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(tenencia_de_la_vivienda_mod2=
                  factor(dplyr::case_when(tenencia_de_la_vivienda_mod=="Allegado"~"Stays temporarily with a relative", tenencia_de_la_vivienda_mod=="Arrienda"~"Renting", tenencia_de_la_vivienda_mod=="Cedida"~"Owner/Transferred dwellings/Pays Dividends", tenencia_de_la_vivienda_mod=="Ocupación Irregular"~"Illegal Settlement", tenencia_de_la_vivienda_mod=="Otros"~"Others", tenencia_de_la_vivienda_mod=="Paga dividendo"~"Owner/Transferred dwellings/Pays Dividends", tenencia_de_la_vivienda_mod=="Propia"~"Owner/Transferred dwellings/Pays Dividends", T~NA_character_)))%>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  # dplyr::mutate(numero_de_hijos_mod_joel=dplyr::case_when(
  #   grepl("Si$",embarazo, ignore.case=T)~as.integer(numero_de_hijos_mod+1),
  #   T~as.integer(numero_de_hijos_mod)))%>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
    dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
  dplyr::mutate(freq_cons_sus_prin=if_else(freq_cons_sus_prin=='Did not use','Less than 1 day a week',as.character(freq_cons_sus_prin),NA_character_)) 

left_join: added 178 columns (sex, fech_nac, fech_ing, fech_egres_imp, dup, …)

       > rows only in x        0
       > rows only in y  (64,109)
       > matched rows     54,159
       >                 ========
       > rows total       54,159
Show code
attr(Base_fiscalia_v13c_dic_2022_4$escolaridad_rec,"label") <- "Educational Attainment"
attr(Base_fiscalia_v13c_dic_2022_4$freq_cons_sus_prin,"label") <- "Primary Substance at Admission Usage
Frequency"
attr(Base_fiscalia_v13c_dic_2022_4$fech_ap_top_num,"label") <- "Date of discharge"
attr(Base_fiscalia_v13c_dic_2022_4$edad_a_ap_top_num,"label") <- "Age at TOP application"
attr(Base_fiscalia_v13c_dic_2022_4$end_type,"label") <- "Date of discharge"
attr(Base_fiscalia_v13c_dic_2022_4$comorbidity_icd_10,"label") <- "Comorbidity ICD-10 (with amount of different diagnosis)"
attr(Base_fiscalia_v13c_dic_2022_4$con_quien_vive_joel,"label") <- "Living conditions"
attr(Base_fiscalia_v13c_dic_2022_4$acq_bin,"label") <- "Pre-treatment Criminality, Acquisitive (PO) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$sud_bin,"label") <- "Pre-treatment Criminality, Substance-related (PO) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$vio_bin,"label") <- "Pre-treatment Criminality, Violent (PO) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$oth_bin,"label") <- "Pre-treatment Criminality, Other (PO) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$acq_top_bin,"label") <- "Pre-treatment Criminality, Acquisitive (TOP) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$sud_top_bin,"label") <- "Pre-treatment Criminality, Substance-related (TOP) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$vio_top_bin,"label") <- "Pre-treatment Criminality, Violent (TOP) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$oth_top_bin,"label") <- "Pre-treatment Criminality, Other (TOP) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$tot_off_top,"label") <- "Count of previous TOP offenses"
attr(Base_fiscalia_v13c_dic_2022_4$tot_off_top_bin,"label") <- "Count of previous TOP offenses (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$tot,"label") <- "Count of previous offenses"
attr(Base_fiscalia_v13c_dic_2022_4$tot_bin,"label") <- "Count of previous PO offenses (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$discrepancies,"label") <- "Relationship between total records (PO) and reports of offenses (TOP)"
attr(Base_fiscalia_v13c_dic_2022_4$classification,"label") <- "Types of ther. center"
attr(Base_fiscalia_v13c_dic_2022_4$clas_r,"label") <- "Rurality classification"

tbone_desc_merge<-
CreateTableOne(vars= setdiff(c(vars_db, paste0(c("acq","sud","vio","oth"),"_bin"), paste0(c("acq","sud","vio","oth"),"_top_bin"), "tot_off_top","tot_off_top_bin", "tot", "tot_bin", "edad_a_ap_top_num", "fech_ap_top"),c("id_centro", "comuna_residencia_cod")), data=  Base_fiscalia_v13c_dic_2022_4, factorVars = c(setdiff(vars_db,c("edad_a_ap_top_num","fech_ap_top_num","id_centro")),  paste0(c("acq","sud","vio","oth"),"_bin"), paste0(c("acq","sud","vio","oth"),"_top_bin")), smd=T,  addOverall = T, includeNA=T, strata="discrepancies", test=T) #

as.data.frame.TableOne(tbone_desc_merge, smd=T, nonnormal= T)%>% 
  dplyr::mutate(char2=characteristic) %>% 
  tidyr::fill(char2) %>% 
  dplyr::select(char2,everything()) %>% 
  dplyr::mutate(level=ifelse(is.na(level),"[Missing]",level)) %>% 
  dplyr::mutate(char2=dplyr::case_when(characteristic=="NA"~NA_character_,T~as.character(characteristic))) %>% 
  format_cells(1, 1:length(names(.)), "bold") %>%
  dplyr::select(-1) %>% 
  #knitr::kable(size=10, format="markdown",caption= "Summary descriptives, by discrepancy between PO records and TOP reports of offenses", escape=T)
 knitr::kable(size=10, format="html",caption= "Summary descriptives, by discrepancy between PO records and TOP reports of offenses", escape=T) %>% kableExtra::kable_classic()%>% 
    kableExtra::scroll_box(width = "100%", height = "550px") 
Table 1: Summary descriptives, by discrepancy between PO records and TOP reports of offenses
characteristic level Overall coincidence overreport underreport p test SMD
n 54159 41314 12285 560
Age at TOP application (median [IQR]) 35.03 [28.54, 43.46] 35.89 [29.13, 44.48] 32.65 [26.94, 39.74] 34.53 [27.62, 42.82] <0.001 nonnorm 0.222
Date of discharge (%) ACUERDO REPARATORIO 100 ( 0.2) 29 ( 0.1) 19 ( 0.2) 52 ( 9.3) <0.001 8.497
SENTENCIA DEFINITIVA CONDENATORIA 756 ( 1.4) 208 ( 0.5) 170 ( 1.4) 378 ( 67.5)
SOBRESEIMIENTO DEFINITIVO 240 171 ( 0.3) 58 ( 0.1) 42 ( 0.3) 71 ( 12.7)
SUSPENSI¿N CONDICIONAL DEL PROCEDIMIENTO 123 ( 0.2) 35 ( 0.1) 29 ( 0.2) 59 ( 10.5)
[Missing] 53009 (97.9) 40984 ( 99.2) 12025 (97.9) 0 ( 0.0)
Date of discharge (median [IQR]) 17437.00 [17070.00, 17805.00] 17463.00 [17107.00, 17821.00] 17352.00 [16958.00, 17743.00] 17348.50 [17003.75, 17718.00] <0.001 nonnorm 0.148
sex (%) Men 42003 (77.6) 32442 ( 78.5) 9111 (74.2) 450 ( 80.4) <0.001 0.099
Women 12156 (22.4) 8872 ( 21.5) 3174 (25.8) 110 ( 19.6)
Número de Tratamientos por HASH (Total)/Number of Treatments by User (Total) (%) 1 32076 (59.2) 25038 ( 60.6) 6680 (54.4) 358 ( 63.9) <0.001 0.164
2 13495 (24.9) 10076 ( 24.4) 3286 (26.7) 133 ( 23.8)
3 5337 ( 9.9) 3903 ( 9.4) 1385 (11.3) 49 ( 8.8)
4 2094 ( 3.9) 1506 ( 3.6) 575 ( 4.7) 13 ( 2.3)
5 685 ( 1.3) 468 ( 1.1) 213 ( 1.7) 4 ( 0.7)
6 304 ( 0.6) 198 ( 0.5) 104 ( 0.8) 2 ( 0.4)
7 113 ( 0.2) 80 ( 0.2) 32 ( 0.3) 1 ( 0.2)
8 54 ( 0.1) 45 ( 0.1) 9 ( 0.1) 0 ( 0.0)
10 1 ( 0.0) 0 ( 0.0) 1 ( 0.0) 0 ( 0.0)
Macrozona del Centro del Último Registro(b)/Macrozones of the Center of the Last Entry(b) (%) Center 36329 (67.1) 27629 ( 66.9) 8353 (68.0) 347 ( 62.0) <0.001 0.157
North 10288 (19.0) 7509 ( 18.2) 2665 (21.7) 114 ( 20.4)
South 7542 (13.9) 6176 ( 14.9) 1267 (10.3) 99 ( 17.7)
Región del Centro del Último Registro(b)/Chilean Region of the Center of the Last Entry(b) (%) Antofagasta (02) 2437 ( 4.5) 1810 ( 4.4) 604 ( 4.9) 23 ( 4.1) <0.001 0.217
Araucanía (09) 1715 ( 3.2) 1413 ( 3.4) 271 ( 2.2) 31 ( 5.5)
Arica (15) 2134 ( 3.9) 1604 ( 3.9) 507 ( 4.1) 23 ( 4.1)
Atacama (03) 2695 ( 5.0) 1902 ( 4.6) 758 ( 6.2) 35 ( 6.2)
Aysén (11) 1200 ( 2.2) 1053 ( 2.5) 133 ( 1.1) 14 ( 2.5)
Biobío (08) 3512 ( 6.5) 2779 ( 6.7) 700 ( 5.7) 33 ( 5.9)
Coquimbo (04) 1964 ( 3.6) 1414 ( 3.4) 531 ( 4.3) 19 ( 3.4)
Los Lagos (10) 2618 ( 4.8) 2035 ( 4.9) 549 ( 4.5) 34 ( 6.1)
Los Ríos (14) 1097 ( 2.0) 882 ( 2.1) 204 ( 1.7) 11 ( 2.0)
Magallanes (12) 912 ( 1.7) 793 ( 1.9) 110 ( 0.9) 9 ( 1.6)
Maule (07) 3959 ( 7.3) 3193 ( 7.7) 722 ( 5.9) 44 ( 7.9)
Metropolitana (13) 20944 (38.7) 15832 ( 38.3) 4917 (40.0) 195 ( 34.8)
Ñuble (16) 495 ( 0.9) 384 ( 0.9) 107 ( 0.9) 4 ( 0.7)
O’Higgins (06) 3622 ( 6.7) 2725 ( 6.6) 862 ( 7.0) 35 ( 6.2)
Tarapacá (01) 1058 ( 2.0) 779 ( 1.9) 265 ( 2.2) 14 ( 2.5)
Valparaíso (05) 3797 ( 7.0) 2716 ( 6.6) 1045 ( 8.5) 36 ( 6.4)
Educational Attainment (%) 3-Completed primary school or less 15594 (28.8) 11780 ( 28.5) 3659 (29.8) 155 ( 27.7) <0.001 0.072
2-Completed high school or less 29796 (55.0) 22553 ( 54.6) 6925 (56.4) 318 ( 56.8)
1-More than high school 8605 (15.9) 6861 ( 16.6) 1660 (13.5) 84 ( 15.0)
[Missing] 164 ( 0.3) 120 ( 0.3) 41 ( 0.3) 3 ( 0.5)
Estado Conyugal/Marital Status (%) Married/Shared living arrangements 16249 (30.0) 12675 ( 30.7) 3409 (27.7) 165 ( 29.5) <0.001 0.119
Separated/Divorced 5440 (10.0) 4484 ( 10.9) 900 ( 7.3) 56 ( 10.0)
Single 31922 (58.9) 23702 ( 57.4) 7886 (64.2) 334 ( 59.6)
Widower 452 ( 0.8) 372 ( 0.9) 77 ( 0.6) 3 ( 0.5)
[Missing] 96 ( 0.2) 81 ( 0.2) 13 ( 0.1) 2 ( 0.4)
Primary Substance at Admission Usage Frequency (%) 1 day a week or more 3518 ( 6.5) 2868 ( 6.9) 618 ( 5.0) 32 ( 5.7) <0.001 0.143
2 to 3 days a week 14226 (26.3) 11235 ( 27.2) 2835 (23.1) 156 ( 27.9)
4 to 6 days a week 8645 (16.0) 6549 ( 15.9) 2000 (16.3) 96 ( 17.1)
Daily 25152 (46.4) 18520 ( 44.8) 6388 (52.0) 244 ( 43.6)
Less than 1 day a week 2411 ( 4.5) 1984 ( 4.8) 397 ( 3.2) 30 ( 5.4)
[Missing] 207 ( 0.4) 158 ( 0.4) 47 ( 0.4) 2 ( 0.4)
via_adm_sus_prin_act (%) Oral (drunk or eaten) 19080 (35.2) 15655 ( 37.9) 3209 (26.1) 216 ( 38.6) <0.001 0.218
Intranasal (powder aspiration) 9349 (17.3) 7164 ( 17.3) 2104 (17.1) 81 ( 14.5)
Other 75 ( 0.1) 46 ( 0.1) 26 ( 0.2) 3 ( 0.5)
Smoked or Pulmonary Aspiration 25655 (47.4) 18449 ( 44.7) 6946 (56.5) 260 ( 46.4)
Living conditions (%) Alone 5648 (10.4) 4468 ( 10.8) 1109 ( 9.0) 71 ( 12.7) <0.001 0.092
Family of origin 24160 (44.6) 18093 ( 43.8) 5817 (47.4) 250 ( 44.6)
Others 4821 ( 8.9) 3655 ( 8.8) 1119 ( 9.1) 47 ( 8.4)
With couple/children 19530 (36.1) 15098 ( 36.5) 4240 (34.5) 192 ( 34.3)
Sustancia Principal de Consumo (Sólo más frecuentes)(f)/Primary or Main Substance of Consumption at Admission (Only more frequent)(f) (%) Alcohol 18313 (33.8) 15082 ( 36.5) 3021 (24.6) 210 ( 37.5) <0.001 0.254
Cocaine hydrochloride 9081 (16.8) 6970 ( 16.9) 2030 (16.5) 81 ( 14.5)
Marijuana 2982 ( 5.5) 2308 ( 5.6) 628 ( 5.1) 46 ( 8.2)
Other 675 ( 1.2) 507 ( 1.2) 159 ( 1.3) 9 ( 1.6)
Cocaine paste 23108 (42.7) 16447 ( 39.8) 6447 (52.5) 214 ( 38.2)
Origen de Ingreso (Primera Entrada)/Motive of Admission to Treatment (First Entry) (%) Spontaneous 24913 (46.0) 18913 ( 45.8) 5734 (46.7) 266 ( 47.5) <0.001 0.165
Assisted Referral 5500 (10.2) 4000 ( 9.7) 1458 (11.9) 42 ( 7.5)
Other 2724 ( 5.0) 2041 ( 4.9) 664 ( 5.4) 19 ( 3.4)
Justice Sector 6743 (12.5) 5429 ( 13.1) 1224 (10.0) 90 ( 16.1)
Health Sector 14279 (26.4) 10931 ( 26.5) 3205 (26.1) 143 ( 25.5)
Número de Hijos (Valor Max.)/Number of Children (Max. Value) (%) 0 13054 (24.1) 9763 ( 23.6) 3169 (25.8) 122 ( 21.8) <0.001 0.113
1 14892 (27.5) 11240 ( 27.2) 3495 (28.4) 157 ( 28.0)
2 13371 (24.7) 10377 ( 25.1) 2849 (23.2) 145 ( 25.9)
3 7701 (14.2) 5977 ( 14.5) 1634 (13.3) 90 ( 16.1)
4 2956 ( 5.5) 2248 ( 5.4) 684 ( 5.6) 24 ( 4.3)
5 1120 ( 2.1) 888 ( 2.1) 221 ( 1.8) 11 ( 2.0)
6 368 ( 0.7) 290 ( 0.7) 75 ( 0.6) 3 ( 0.5)
7 122 ( 0.2) 89 ( 0.2) 31 ( 0.3) 2 ( 0.4)
8 52 ( 0.1) 41 ( 0.1) 11 ( 0.1) 0 ( 0.0)
9 33 ( 0.1) 27 ( 0.1) 6 ( 0.0) 0 ( 0.0)
10 12 ( 0.0) 7 ( 0.0) 5 ( 0.0) 0 ( 0.0)
[Missing] 478 ( 0.9) 367 ( 0.9) 105 ( 0.9) 6 ( 1.1)
tenencia_de_la_vivienda_mod (%) Illegal Settlement 777 ( 1.4) 545 ( 1.3) 224 ( 1.8) 8 ( 1.4) <0.001 0.070
Others 1326 ( 2.4) 1007 ( 2.4) 308 ( 2.5) 11 ( 2.0)
Owner/Transferred dwellings/Pays Dividends 17865 (33.0) 13789 ( 33.4) 3895 (31.7) 181 ( 32.3)
Renting 9535 (17.6) 7358 ( 17.8) 2080 (16.9) 97 ( 17.3)
Stays temporarily with a relative 22163 (40.9) 16755 ( 40.6) 5166 (42.1) 242 ( 43.2)
[Missing] 2493 ( 4.6) 1860 ( 4.5) 612 ( 5.0) 21 ( 3.8)
Sustancia de Inicio (Valor más vulnerable)/Starting Substance (Most vulnerable value) (%) Alcohol 31493 (58.1) 24964 ( 60.4) 6194 (50.4) 335 ( 59.8) <0.001 0.169
Cocaine hydrochloride 1878 ( 3.5) 1391 ( 3.4) 471 ( 3.8) 16 ( 2.9)
Marijuana 16183 (29.9) 11629 ( 28.1) 4399 (35.8) 155 ( 27.7)
Other 1378 ( 2.5) 990 ( 2.4) 369 ( 3.0) 19 ( 3.4)
Cocaine paste 2991 ( 5.5) 2159 ( 5.2) 802 ( 6.5) 30 ( 5.4)
[Missing] 236 ( 0.4) 181 ( 0.4) 50 ( 0.4) 5 ( 0.9)
Condición Ocupacional Criterio Corregido(f)/Occupational Status Corrected(f) (%) Employed 23952 (44.2) 19117 ( 46.3) 4551 (37.0) 284 ( 50.7) <0.001 0.191
Inactive 4785 ( 8.8) 3604 ( 8.7) 1135 ( 9.2) 46 ( 8.2)
Looking for a job for the first time 130 ( 0.2) 86 ( 0.2) 42 ( 0.3) 2 ( 0.4)
No activity 3948 ( 7.3) 2911 ( 7.0) 1000 ( 8.1) 37 ( 6.6)
Not seeking for work 713 ( 1.3) 537 ( 1.3) 169 ( 1.4) 7 ( 1.2)
Unemployed 20631 (38.1) 15059 ( 36.5) 5388 (43.9) 184 ( 32.9)
Diagnósico de Trastorno por Consumo de Sustancias(d)/Diagnosed of Substance Use Disorder(d) (%) Drug dependence 39726 (73.4) 29730 ( 72.0) 9585 (78.0) 411 ( 73.4) <0.001 0.094
Hazardous consumption 14433 (26.6) 11584 ( 28.0) 2700 (22.0) 149 ( 26.6)
Comorbidity ICD-10 (with amount of different diagnosis) (%) Diagnosis unknown (under study) 6131 (11.3) 4065 ( 9.8) 1984 (16.1) 82 ( 14.6) <0.001 0.169
One 25583 (47.2) 19262 ( 46.6) 6062 (49.3) 259 ( 46.2)
Two or more 1416 ( 2.6) 1065 ( 2.6) 339 ( 2.8) 12 ( 2.1)
Without psychiatric comorbidity 21029 (38.8) 16922 ( 41.0) 3900 (31.7) 207 ( 37.0)
Rurality classification (%) Urbana 40666 (75.1) 30833 ( 74.6) 9430 (76.8) 403 ( 72.0) <0.001 0.104
Mixta 6826 (12.6) 5265 ( 12.7) 1493 (12.2) 68 ( 12.1)
Rural 6666 (12.3) 5216 ( 12.6) 1362 (11.1) 88 ( 15.7)
[Missing] 1 ( 0.0) 0 ( 0.0) 0 ( 0.0) 1 ( 0.2)
Types of ther. center (%) Private Therapeutic Community 18578 (34.3) 13371 ( 32.4) 5033 (41.0) 174 ( 31.1) <0.001 0.167
Public Hospital 9078 (16.8) 7013 ( 17.0) 1962 (16.0) 103 ( 18.4)
Public Mental Health Primary Care 11352 (21.0) 8756 ( 21.2) 2466 (20.1) 130 ( 23.2)
Public Primary Care 8502 (15.7) 6969 ( 16.9) 1448 (11.8) 85 ( 15.2)
Public Therapeutic Community 3651 ( 6.7) 2809 ( 6.8) 802 ( 6.5) 40 ( 7.1)
[Missing] 2998 ( 5.5) 2396 ( 5.8) 574 ( 4.7) 28 ( 5.0)
Pre-treatment Criminality, Acquisitive (PO) (dichotomized) (%) 0 53735 (99.2) 41200 ( 99.7) 12169 (99.1) 366 ( 65.4) <0.001 0.694
1 424 ( 0.8) 114 ( 0.3) 116 ( 0.9) 194 ( 34.6)
Pre-treatment Criminality, Substance-related (PO) (dichotomized) (%) 0 53977 (99.7) 41271 ( 99.9) 12264 (99.8) 442 ( 78.9) <0.001 0.488
1 182 ( 0.3) 43 ( 0.1) 21 ( 0.2) 118 ( 21.1)
Pre-treatment Criminality, Violent (PO) (dichotomized) (%) 0 53870 (99.5) 41222 ( 99.8) 12226 (99.5) 422 ( 75.4) <0.001 0.541
1 289 ( 0.5) 92 ( 0.2) 59 ( 0.5) 138 ( 24.6)
Pre-treatment Criminality, Other (PO) (dichotomized) (%) 0 53836 (99.4) 41222 ( 99.8) 12214 (99.4) 400 ( 71.4) <0.001 0.601
1 323 ( 0.6) 92 ( 0.2) 71 ( 0.6) 160 ( 28.6)
Pre-treatment Criminality, Acquisitive (TOP) (dichotomized) (%) 0 49742 (91.8) 41225 ( 99.8) 7966 (64.8) 551 ( 98.4) <0.001 0.713
1 4417 ( 8.2) 89 ( 0.2) 4319 (35.2) 9 ( 1.6)
Pre-treatment Criminality, Substance-related (TOP) (dichotomized) (%) 0 52585 (97.1) 41305 (100.0) 10720 (87.3) 560 (100.0) <0.001 0.367
1 1574 ( 2.9) 9 ( 0.0) 1565 (12.7) 0 ( 0.0)
Pre-treatment Criminality, Violent (TOP) (dichotomized) (%) 0 44606 (82.4) 41110 ( 99.5) 2967 (24.2) 529 ( 94.5) <0.001 1.601
1 9553 (17.6) 204 ( 0.5) 9318 (75.8) 31 ( 5.5)
Pre-treatment Criminality, Other (TOP) (dichotomized) (%) 0 52519 (97.0) 41279 ( 99.9) 10686 (87.0) 554 ( 98.9) <0.001 0.384
1 1640 ( 3.0) 35 ( 0.1) 1599 (13.0) 6 ( 1.1)
Count of previous TOP offenses (median [IQR]) 0.00 [0.00, 0.00] 0.00 [0.00, 0.00] 1.00 [1.00, 2.00] 0.00 [0.00, 0.00] <0.001 nonnorm 1.604
Count of previous TOP offenses (dichotomized) (median [IQR]) 0.00 [0.00, 0.00] 0.00 [0.00, 0.00] 1.00 [1.00, 1.00] 0.00 [0.00, 0.00] <0.001 nonnorm 7.002
Count of previous offenses (median [IQR]) 0.00 [0.00, 0.00] 0.00 [0.00, 0.00] 0.00 [0.00, 0.00] 1.00 [1.00, 1.00] <0.001 nonnorm 2.303
Count of previous PO offenses (dichotomized) (median [IQR]) 0.00 [0.00, 0.00] 0.00 [0.00, 0.00] 0.00 [0.00, 0.00] 1.00 [1.00, 1.00] <0.001 nonnorm 8.496

Time for this code chunk to run: 0.1 minutes


Comparison

We compared the records of TOP (Hurto,Robo, Venta.Drogas, Riña, Total.VIF, Otro, & tot_off_top) with the records of PO (vio, acq, sud, oth & tot). We grouped TOPs self-reports of offenses related to burglary or robbery into acquisitive (acq_top), the reports of drug-selling into substance related (sud_top) and reports of fights and domestic violence into violent (vio_top) and Other into other (oth_top).

Show code
cat("::: {.panel}", "\n", "[Total]{.panel-name}", "\n")
Total
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0(median(Base_fiscalia_v13c_dic_2022_3$tot,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$tot,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$tot,1-.025, na.rm=T),')')
)

0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
  paste0(median(Base_fiscalia_v13c_dic_2022_3$tot_off_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$tot_off_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$tot_off_top,1-.025, na.rm=T),')')
)

0 (p 2.5= 0, p97.5= 3)

Show code
pol_p<-
polychor(Base_fiscalia_v13c_dic_2022_3$tot_off_top, Base_fiscalia_v13c_dic_2022_3$tot, std.err=T, ML=T)

pol_p2<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$tot_off_top, Base_fiscalia_v13c_dic_2022_3$tot))
[1] “You seem to have a table, I will return just one correlation.”
Show code
#rho  : The (matrix) of tetrachoric/polychoric/biserial correlations
#tau  : The normal equivalent of the cutpoints

#for ordinal variables, the same idea holds...but now we have more than one τ-cut to subdivide the normal distribution into regions associated with the different values of the ordinal measure..

# polychoric is appropriate when the manifest ordinal variables came from categorizing latent normal variables & not otherwise. (In practice, it's more like when you are willing to assume thi

#We start with a relative frequency pattern for our contingency table of ordinal data in the lower-left, and we find the optimal solution for correlation and tau-cuts to get us back to a bivariate normal distribution that would have comparable relative joint-frequencies.

cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(
capture.output(print(pol_p))[c(2,3)]
)

Polychoric Correlation, ML est. = 0.3197 (0.01333)Test of bivariate normality: Chisquare = 69.02, df = 23, p = 1.719e-06

Show code
cat("**Overdispersion**")
Overdispersion
Show code
message(
round(var(Base_fiscalia_v13c_dic_2022_3$tot_off_top)/mean(Base_fiscalia_v13c_dic_2022_3$tot_off_top),2)
)

1.82

Show code
#The variance is not much greater than the mean, which suggests that we will have over-dispersion in the model.
# obtained with a poisson regression

cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$tot_off_top_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$tot_off_top>=4,4,Base_fiscalia_v13c_dic_2022_3$tot_off_top)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
#For more than two classes, these results are calculated comparing each factor level to the remaining levels (i.e. a "one versus all" approach).
# accuracy should be higher than No information rate (naive classifier) in order to be model significant. 
#No information rate tests whether our classifier does better than random assignment
# A hypothesis test is also computed to evaluate whether the overall accuracy rate is greater than the rate of the largest class. P-Value [Acc > NIR]
#Null Error Rate: This is how often you would be wrong if you always predicted the majority class.
# but sometimes the best classifier for a particular application will sometimes have a higher error rate than the null error rate, as in the accuracy paradox
#if the incidence of category A is dominant, being found in 99% of cases, then predicting that every case is category A will have an accuracy of 99%

#Kappa statistics is a measure of how the classification results compare to values assigned by chance. P-value mcnemar, can produce NA values with sparse tables)
pander::pander(caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$tot_off_top_rec), factor(Base_fiscalia_v13c_dic_2022_3$tot))) 
  • positive:

  • table:

      0 1 2 3 4
    0 40984 479 35 2 1
    1 7624 309 32 5 2
    2 2442 146 21 4 0
    3 1140 68 7 0 0
    4 819 34 5 0 0
  • overall:

    Table continues below
    Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
    0.7628 0.0401 0.7592 0.7664 0.9788
    AccuracyPValue McnemarPValue
    1 NA
  • byClass:

    Table continues below
      Sensitivity Specificity Pos Pred Value Neg Pred Value
    Class: 0 0.7732 0.5504 0.9875 0.05001
    Class: 1 0.2983 0.8557 0.03876 0.9843
    Class: 2 0.21 0.9521 0.008037 0.9985
    Class: 3 0 0.9776 0 0.9998
    Class: 4 0 0.9842 0 0.9999
    Table continues below
      Precision Recall F1 Prevalence Detection Rate
    Class: 0 0.9875 0.7732 0.8673 0.9788 0.7567
    Class: 1 0.03876 0.2983 0.06861 0.01913 0.005705
    Class: 2 0.008037 0.21 0.01548 0.001846 0.0003877
    Class: 3 0 0 NA 0.0002031 0
    Class: 4 0 0 NA 5.539e-05 0
      Detection Prevalence Balanced Accuracy
    Class: 0 0.7663 0.6618
    Class: 1 0.1472 0.577
    Class: 2 0.04825 0.581
    Class: 3 0.02243 0.4888
    Class: 4 0.01584 0.4921
  • mode: sens_spec

  • dots:

Show code
cat("**Kappa with ordinal weights**")
Kappa with ordinal weights
Show code
pander::pander(irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$tot_off_top_rec), ordered(Base_fiscalia_v13c_dic_2022_3$tot)),weights = irrCAC::ordinal.weights(0:4)))
coeff.name coeff.val coeff.se coeff.ci coeff.pval
Cohen’s Kappa 0.0349 0.002059 (0.031,0.039) 0e+00
Show code
#really low
#To choose between linear and quadratic weights, ask yourself if the difference between being off by 1 vs. 2 categories is the same as the difference between being off by 2 vs. 3 categories. With linear weights, the penalty is always the same (e.g., 0.33 credit is subtracted for each additional category). However, this is not the case for quadratic weights, where penalties begin mild then grow harsher.
#CONS: Depends on the marginal distribution (prevalence) of the categories

cat("**Plot**")
Plot
Show code
ggstatsplot::ggscatterstats(tot_off_top, tot, data=Base_fiscalia_v13c_dic_2022_3, 
                            type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
                            xlab="Number of TOP offenses", ylab="Number of PO offenses", 
                            title="Relationship between Total counts of offenses from PO and TOP")

stat_bin() using bins = 30. Pick better value with binwidth.

stat_bin() using bins = 30. Pick better value with binwidth.

Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Acquisitive]{.panel-name}", "\n")
Acquisitive
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$acq,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$acq,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$acq,1-.025, na.rm=T),')')
)

Mdn= 0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$acq_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$acq_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$acq_top,1-.025, na.rm=T),')')
)

Mdn= 0 (p 2.5= 0, p97.5= 2)

Show code
pol_p_acq<-
polychor(Base_fiscalia_v13c_dic_2022_3$acq_top, Base_fiscalia_v13c_dic_2022_3$acq, std.err=T, ML=T)

pol_p2_acq<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$acq_top, Base_fiscalia_v13c_dic_2022_3$acq))
[1] “You seem to have a table, I will return just one correlation.”
Show code
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(capture.output(print(pol_p_acq))[c(2,3)])

Polychoric Correlation, ML est. = 0.4372 (0.02007)Test of bivariate normality: Chisquare = 25.55, df = 7, p = 0.0006063

Show code
cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$acq_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$acq>=2,2,Base_fiscalia_v13c_dic_2022_3$acq)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
pander::pander(tidy(caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$acq_top), factor(Base_fiscalia_v13c_dic_2022_3$acq_rec))))
term class estimate conf.low conf.high p.value
accuracy NA 0.916 0.9137 0.9184 1
kappa NA 0.04983 NA NA NA
mcnemar NA NA NA NA 0
sensitivity 0 0.9213 NA NA NA
specificity 0 0.441 NA NA NA
pos_pred_value 0 0.9952 NA NA NA
neg_pred_value 0 0.04234 NA NA NA
precision 0 0.9952 NA NA NA
recall 0 0.9213 NA NA NA
f1 0 0.9568 NA NA NA
prevalence 0 0.9922 NA NA NA
detection_rate 0 0.9141 NA NA NA
detection_prevalence 0 0.9184 NA NA NA
balanced_accuracy 0 0.6812 NA NA NA
sensitivity 1 0.2519 NA NA NA
specificity 1 0.951 NA NA NA
pos_pred_value 1 0.03692 NA NA NA
neg_pred_value 1 0.9942 NA NA NA
precision 1 0.03692 NA NA NA
recall 1 0.2519 NA NA NA
f1 1 0.06439 NA NA NA
prevalence 1 0.007404 NA NA NA
detection_rate 1 0.001865 NA NA NA
detection_prevalence 1 0.05052 NA NA NA
balanced_accuracy 1 0.6014 NA NA NA
sensitivity 2 0.2609 NA NA NA
specificity 2 0.9691 NA NA NA
pos_pred_value 2 0.003569 NA NA NA
neg_pred_value 2 0.9997 NA NA NA
precision 2 0.003569 NA NA NA
recall 2 0.2609 NA NA NA
f1 2 0.007042 NA NA NA
prevalence 2 0.0004247 NA NA NA
detection_rate 2 0.0001108 NA NA NA
detection_prevalence 2 0.03104 NA NA NA
balanced_accuracy 2 0.615 NA NA NA
Show code
cat("**Kappa with ordinal weights**")
Kappa with ordinal weights
Show code
pander::pander(irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$acq_top), ordered(Base_fiscalia_v13c_dic_2022_3$acq_rec)),weights = irrCAC::ordinal.weights(0:2)))
coeff.name coeff.val coeff.se coeff.ci coeff.pval
Cohen’s Kappa 0.04807 0.004008 (0.04,0.056) 0e+00
Show code
cat("**Plot**")
Plot
Show code
ggstatsplot::ggscatterstats(acq_top, acq, data=Base_fiscalia_v13c_dic_2022_3, 
                            type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
                            xlab="Number of TOP offenses", ylab="Number of PO offenses", 
                            title="Relationship between counts of Acquisitive offenses from PO and TOP")

stat_bin() using bins = 30. Pick better value with binwidth. stat_bin() using bins = 30. Pick better value with binwidth.

Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[SUD]{.panel-name}", "\n")
SUD
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$sud,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$sud,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$sud,1-.025, na.rm=T),')')
)

Mdn= 0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$sud_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$sud_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$sud_top,1-.025, na.rm=T),')')
)

Mdn= 0 (p 2.5= 0, p97.5= 1)

Show code
pol_p_sud<-
polychor(Base_fiscalia_v13c_dic_2022_3$sud_top, Base_fiscalia_v13c_dic_2022_3$sud, std.err=T, ML=T)

pol_p2_sud<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$sud_top, Base_fiscalia_v13c_dic_2022_3$sud))
[1] “You seem to have a table, I will return just one correlation.”
Show code
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(capture.output(print(pol_p_sud))[c(2,3)])

Polychoric Correlation, ML est. = 0.1965 (0.04568)Test of bivariate normality: Chisquare = 1.419, df = 2, p = 0.492

Show code
cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$sud_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$sud>=1,1,Base_fiscalia_v13c_dic_2022_3$sud)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
pander::pander(tidy(
caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$sud_top), factor(Base_fiscalia_v13c_dic_2022_3$sud_rec))
))
term class estimate conf.low conf.high p.value
accuracy NA 0.9682 0.9667 0.9697 1
kappa NA 0.01342 NA NA NA
mcnemar NA NA NA NA 2.423e-246
sensitivity 0 0.9712 NA NA NA
specificity 0 0.09341 NA NA NA
pos_pred_value 0 0.9969 NA NA NA
neg_pred_value 0 0.0108 NA NA NA
precision 0 0.9969 NA NA NA
recall 0 0.9712 NA NA NA
f1 0 0.9838 NA NA NA
prevalence 0 0.9966 NA NA NA
detection_rate 0 0.9679 NA NA NA
detection_prevalence 0 0.9709 NA NA NA
balanced_accuracy 0 0.5323 NA NA NA
Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Violent]{.panel-name}", "\n")
Violent
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$vio,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$vio,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$vio,1-.025, na.rm=T),')')
)

Mdn= 0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$vio_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$vio_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$vio_top,1-.025, na.rm=T),')')
)

Mdn= 0 (p 2.5= 0, p97.5= 2)

Show code
pol_p_vio<-
polychor(Base_fiscalia_v13c_dic_2022_3$vio_top, Base_fiscalia_v13c_dic_2022_3$vio, std.err=T, ML=T)

pol_p2_vio<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$vio_top, Base_fiscalia_v13c_dic_2022_3$vio))
[1] “You seem to have a table, I will return just one correlation.”
Show code
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(capture.output(print(pol_p_vio))[c(2,3)])

Polychoric Correlation, ML est. = 0.3468 (0.02255)Test of bivariate normality: Chisquare = 23.84, df = 5, p = 0.0002335

Show code
cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$vio_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$vio>=2,2,Base_fiscalia_v13c_dic_2022_3$vio)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
pander::pander(tidy(
  caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$vio_top), factor(Base_fiscalia_v13c_dic_2022_3$vio_rec))
))
term class estimate conf.low conf.high p.value
accuracy NA 0.8235 0.8203 0.8267 1
kappa NA 0.01975 NA NA NA
mcnemar NA NA NA NA 0
sensitivity 0 0.8257 NA NA NA
specificity 0 0.564 NA NA NA
pos_pred_value 0 0.9972 NA NA NA
neg_pred_value 0 0.01706 NA NA NA
precision 0 0.9972 NA NA NA
recall 0 0.8257 NA NA NA
f1 0 0.9034 NA NA NA
prevalence 0 0.9947 NA NA NA
detection_rate 0 0.8213 NA NA NA
detection_prevalence 0 0.8236 NA NA NA
balanced_accuracy 0 0.6949 NA NA NA
sensitivity 1 0.4326 NA NA NA
specificity 1 0.8553 NA NA NA
pos_pred_value 1 0.01541 NA NA NA
neg_pred_value 1 0.9965 NA NA NA
precision 1 0.01541 NA NA NA
recall 1 0.4326 NA NA NA
f1 1 0.02976 NA NA NA
prevalence 1 0.005207 NA NA NA
detection_rate 1 0.002253 NA NA NA
detection_prevalence 1 0.1462 NA NA NA
balanced_accuracy 1 0.644 NA NA NA
sensitivity 2 0 NA NA NA
specificity 2 0.9698 NA NA NA
pos_pred_value 2 0 NA NA NA
neg_pred_value 2 0.9999 NA NA NA
precision 2 0 NA NA NA
recall 2 0 NA NA NA
f1 2 NA NA NA NA
prevalence 2 0.0001292 NA NA NA
detection_rate 2 0 NA NA NA
detection_prevalence 2 0.03023 NA NA NA
balanced_accuracy 2 0.4849 NA NA NA
Show code
cat("**Kappa with ordinal weights**")
Kappa with ordinal weights
Show code
pander::pander(
irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$vio_top), ordered(Base_fiscalia_v13c_dic_2022_3$vio_rec)),weights = irrCAC::ordinal.weights(0:2))
)
coeff.name coeff.val coeff.se coeff.ci coeff.pval
Cohen’s Kappa 0.01956 0.00187 (0.016,0.023) 0e+00
Show code
cat("**Plot**")
Plot
Show code
ggstatsplot::ggscatterstats(vio_top, vio, data=Base_fiscalia_v13c_dic_2022_3, 
                            type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
                            xlab="Number of TOP offenses", ylab="Number of PO offenses", 
                            title="Relationship between counts of violent offenses from PO and TOP")

stat_bin() using bins = 30. Pick better value with binwidth. stat_bin() using bins = 30. Pick better value with binwidth.

Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Other]{.panel-name}", "\n")
Other
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$oth,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$oth,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$oth,1-.025, na.rm=T),')')
)

Mdn= 0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$oth_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$oth_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$oth_top,1-.025, na.rm=T),')')
)

Mdn= 0 (p 2.5= 0, p97.5= 1)

Show code
pol_p_oth<-
polychor(Base_fiscalia_v13c_dic_2022_3$oth_top, Base_fiscalia_v13c_dic_2022_3$oth, std.err=T, ML=T)

pol_p2_oth<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$oth_top, Base_fiscalia_v13c_dic_2022_3$oth))
[1] “You seem to have a table, I will return just one correlation.”
Show code
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(capture.output(print(pol_p_oth))[c(2,3)])

Polychoric Correlation, ML est. = 0.187 (0.0373)Test of bivariate normality: Chisquare = 1.082, df = 2, p = 0.582

Show code
cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$oth_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$oth>=1,1,Base_fiscalia_v13c_dic_2022_3$oth)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
pander::pander(tidy(
caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$oth_top), factor(Base_fiscalia_v13c_dic_2022_3$oth_rec))
))
term class estimate conf.low conf.high p.value
accuracy NA 0.9648 0.9632 0.9663 1
kappa NA 0.01875 NA NA NA
mcnemar NA NA NA NA 1.654e-199
sensitivity 0 0.9701 NA NA NA
specificity 0 0.08669 NA NA NA
pos_pred_value 0 0.9944 NA NA NA
neg_pred_value 0 0.01707 NA NA NA
precision 0 0.9944 NA NA NA
recall 0 0.9701 NA NA NA
f1 0 0.9821 NA NA NA
prevalence 0 0.994 NA NA NA
detection_rate 0 0.9643 NA NA NA
detection_prevalence 0 0.9697 NA NA NA
balanced_accuracy 0 0.5284 NA NA NA
Show code
cat(":::", "\n")

Time for this code chunk to run: 0.5 minutes

Show code
#
cat("Manually construct measure tables")
#http://rstudio-pubs-static.s3.amazonaws.com/370944_96c386c03ac54ef3bec4535d49e92890.html

confusion_table = table(Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin, Base_fiscalia_v13c_dic_2022_3$tot_bin)
confusion_table[1,1] = 'TN'
confusion_table[1,2] = 'FN'
confusion_table[2,1] = 'FP'
confusion_table[2,2] = 'TP'

get_accuracy <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  accuracy = round((TP + TN) / sum(TP,FP,TN,FN), 2)
  return(accuracy)
}
get_classification_error_rate <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  classification_error_rate = round((FP + FN) / sum(TP,FP,TN,FN),2)
  return(classification_error_rate)
}
get_precision <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  precision = round(TP / (TP + FP), 2)
  return(precision)
}
get_sensitivity <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  sensitivity = round(TP / (TP + FN), 2)
  return(sensitivity)
}
get_specificity <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  specificity = round(TN / (TN + FP), 2)
  return(specificity)
}
get_f1_score <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  
  precision = round(TP / (TP + FP), 2)
  sensitivity = round(TP / (TP + FN), 2)
  f1_score = round((2 * precision * sensitivity) / (precision + sensitivity), 2)
  return(f1_score)
}

caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin), factor(Base_fiscalia_v13c_dic_2022_3$tot_bin))

score = data.frame(accuracy=get_accuracy(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      classification_error_rate=get_classification_error_rate(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      precision=get_precision(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      sensitivity=get_sensitivity(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      specificity=get_specificity(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      f1_score=get_f1_score(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'))

#We can also combine precision and recall into an F1 score. This is the harmonic mean of precision and recall.
knitr::kable(score, caption= "Table 1. Properties")# %>% kableExtra::kable_classic()

Time for this code chunk to run: 0 minutes


ROC

For ROC curves, we dichotomized into committing a crime or not against reporting or not (*_bin). Posteriorly, we calculated confidence intervals using bootstrap with 500 replications.

Show code
#Acquisitive  SUD   Violent Other
suppressMessages(suppressWarnings(library(ROCit)))
## Warning: package 'ROCit' was built under R version 3.5.2
message("Error in convertclass(class, reference = negref) :    class must have exactly two unique values)")

Error in convertclass(class, reference = negref) : class must have exactly two unique values)

Show code
cat("::::: {.panelset}", "\n")
Show code
cat("::: {.panel}", "\n", "[Total]{.panel-name}", "\n")
Total
Show code
ROCit_obj <- rocit(score=Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin,class=Base_fiscalia_v13c_dic_2022_3$tot_bin)
plot(ROCit_obj)
Show code
#Del YOuden, el óptimo es...
pander::pander(ciROC(ROCit_obj,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.2268 and 1
  • TPR: 0, 0.5504 and 1
  • LowerTPR: 0, 0.5214 and 1
  • UpperTPR: 0, 0.5794 and 1
Show code
#FPR: 1-especificidad
#TPR: Sensibilidad

cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Acquisitive]{.panel-name}", "\n")
Acquisitive
Show code
ROCit_acq <- rocit(score=Base_fiscalia_v13c_dic_2022_3$acq_top_bin,class=Base_fiscalia_v13c_dic_2022_3$acq_bin)
plot(ROCit_acq)
Show code
pander::pander(ciROC(ROCit_acq,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.07872 and 1
  • TPR: 0, 0.441 and 1
  • LowerTPR: 0, 0.3937 and 1
  • UpperTPR: 0, 0.4883 and 1
Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[SUD]{.panel-name}", "\n")
SUD
Show code
ROCit_sud <- rocit(score=Base_fiscalia_v13c_dic_2022_3$sud_top_bin,class=Base_fiscalia_v13c_dic_2022_3$sud_bin)
plot(ROCit_sud)
Show code
pander::pander(ciROC(ROCit_sud,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.02885 and 1
  • TPR: 0, 0.09341 and 1
  • LowerTPR: 0, 0.05106 and 1
  • UpperTPR: 0, 0.1357 and 1
Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Violent]{.panel-name}", "\n")
Violent
Show code
ROCit_vio <- rocit(score=Base_fiscalia_v13c_dic_2022_3$vio_top_bin,class=Base_fiscalia_v13c_dic_2022_3$vio_bin)
plot(ROCit_vio)
Show code
pander::pander(ciROC(ROCit_vio,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.1743 and 1
  • TPR: 0, 0.564 and 1
  • LowerTPR: 0, 0.5067 and 1
  • UpperTPR: 0, 0.6213 and 1
Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Other]{.panel-name}", "\n")
Other
Show code
ROCit_oth <- rocit(score=Base_fiscalia_v13c_dic_2022_3$oth_top_bin,class=Base_fiscalia_v13c_dic_2022_3$oth_bin)
plot(ROCit_oth)
Show code
pander::pander(ciROC(ROCit_oth,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.02994 and 1
  • TPR: 0, 0.08669 and 1
  • LowerTPR: 0, 0.05592 and 1
  • UpperTPR: 0, 0.1175 and 1
Show code
cat(":::", "\n")
Show code
cat(":::::", "\n")

Time for this code chunk to run: 0.4 minutes


Analysis

Show code
mlogit.display2 <- function(multinom.model, decimal=2, alpha=.05) {
  s <- summary(multinom.model)
  z <- s$coefficients/s$standard.errors
  pnorm.z <- pnorm(z)
  pgroup <- cut(pnorm.z, c(0,0.0005,0.005,0.025,0.975, 0.995, 0.9995,1))
  stars <-c("***","**","*","","*","**","***")
  #x <-paste(round(s$coefficients,decimal),"/",round(s$standard.errors,decimal+1),stars[pgroup], sep="")
   x <-paste(round(s$coefficients,decimal),"", "", sep="")
  dim(x) <- dim(z)
  colnames(x) <- colnames(s$coefficients)
  rownames(x) <- rownames(s$coefficients)
  
  x1 <- t( x)
  x2 <- t(exp(s$coefficients))
  x2 <- round(x2,decimal)
  x2.1 <- t(exp(s$coefficients-qnorm(1-alpha/2)*s$standard.errors))
  x2.1 <- round(x2.1,decimal)
  x2.2 <- t(exp(s$coefficients+qnorm(1-alpha/2)*s$standard.errors))
  x2.2 <- round(x2.2,decimal)
  x2 <- paste(x2,"(", x2.1, ",", x2.2, ")",  sep="")
  dim(x2) <- dim(x1)
  x2[1,] <- "-"
  x3 <- cbind(x1[,1],x2[,1])
  for(i in 2: (length(s$lab)-1)){x3 <- cbind(x3, cbind(x1[,i],x2[,i]))}
  x4 <- rbind(c("Coeff./SE",paste("RRR(",100-100*alpha,"%CI)   ",sep=""),"Coeff./SE",paste("RRR(",100-100*alpha,"%CI)   ",sep="")),x3)
  colnames.x4 <- c(s$lab[2],"")
  for(i in 3:(length(s$lab))){colnames.x4 <- c(colnames.x4,c(s$lab[i],""))}
  colnames(x4) <- colnames.x4
  rownames(x4) <- c("",s$coefnames)
  cat("\n")
  cat(paste("Outcome =",as.character(s$term)[2],"; ","Referent group = ",s$lab[1],sep=""), "\n")
  

  assign(paste0("output_", deparse(substitute(multinom.model))),data.table::data.table(x4, keep.rownames=T), envir = .GlobalEnv)
    
  print.noquote(x4)
  cat("\n")
  cat("Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ", "\n")
  cat("\n")
  cat(paste("Residual Deviance:", round(s$deviance,decimal), "\n"))
  cat(paste("AIC =", round(s$AIC,digits=decimal), "\n"))
  cat("\n")
}


library(rpart)
library(rpart.plot)
library(partykit)
#Random Forest
library(randomForest)
library(epiDisplay)

Error in library(epiDisplay): there is no package called ‘epiDisplay’

Show code
form<-
as.formula(paste("discrepancies~ ", paste(setdiff(c(vars_db, "numero_de_hijos_mod", "tot_off_top","tot_off_top_bin", "tot", "tot_bin", "edad_a_ap_top_num", "fech_ap_top"), c("id_centro", "comuna_residencia_cod",  "tot_off_top", "tot", "tot_off_top_bin", "tot_bin", "end_type", "fech_ap_top", "nombre_region")), collapse="+ "))) #paste0(c("acq","sud","vio","oth"),"_bin"),   paste0(c("acq","sud","vio","oth"),"_top_bin"), 

form_multilev<-
as.formula(paste("discrepancies~ ", paste(setdiff(c(vars_db, "numero_de_hijos_mod", "tot_off_top","tot_off_top_bin", "tot", "tot_bin", "edad_a_ap_top_num", "fech_ap_top"), c("id_centro", "comuna_residencia_cod", "tot_off_top", "tot",  "tot_off_top_bin", "tot_bin", "end_type", "fech_ap_top", "nombre_region")), collapse="+ "), "| HASH_KEY"))

set.seed(2125)
#Increase the MaxNWts parameter by passing it directly
#Error in nnet.default(X, Y, w, mask = mask, size = 0, skip = TRUE, softmax = TRUE,  :  too many (2502) weights
#In both GLMs and nnet::multinom it is the case that covariates should be scaled to make sure that the fit converges.
invisible("Multinomial logistic")
logit <- nnet::multinom(formula= form, data=Base_fiscalia_v13c_dic_2022_4 %>% dplyr::mutate_if(is.numeric, ~scale(.)),MaxNWts=84581, maxit= 1e3)
# weights:  165 (108 variable)
initial  value 52446.652049 
iter  10 value 27392.213657
iter  20 value 27336.082334
iter  30 value 27262.308639
iter  40 value 27174.675508
iter  50 value 27133.124026
iter  60 value 27104.160304
iter  70 value 27084.782458
iter  80 value 27074.848586
iter  90 value 27063.590666
iter 100 value 27061.309495
iter 110 value 27061.080883
iter 120 value 27060.910452
final  value 27060.909134 
converged
Show code
bs <- function(formula, data, indices) {
  d = data[indices,] # allows boot to select sample
  fit = nnet::multinom(formula, data=d, Hess = TRUE, trace=F, MaxNWts=84581, maxit= 1e3)
  s = summary(fit)
  return( cbind(s$coefficients, s$standard.errors) )
}

# 5 replications
results = list()
set.seed(2125)
results <- boot(
  data=Base_fiscalia_v13c_dic_2022_4 %>% dplyr::mutate_if(is.numeric, ~scale(.)), statistic=bs, R=2e3, parallel='multicore', formula=form
)

#mm <- nnet::multinom(discrepancies ~ .+0, data=Base_fiscalia_v13c_dic_2022_4 %>% dplyr::mutate_if(is.numeric, ~scale(.)), trace=F)
#lmtest::lrtest(mm, "Speciesversicolor")
#Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
#car::Anova(mm)

#https://stackoverflow.com/questions/33170335/bootstrap-multinomial-regression-in-r
# label the estimates

subModelNames <- colnames(results$t0)
varNames <- rownames(results$t0)

estNames <- apply(expand.grid(varNames,subModelNames),1,function(x) paste(x,collapse="_"))
colnames(results$t) <- estNames

library(car)
multinom_cis_norm<-round(exp(confint(results, level=0.95, type="norm")),2)
multinom_cis_perc<-round(exp(confint(results, level=0.95, type="perc")),2)
#multinom_cis_bca<-confint(results, level=0.95, type="bca") #slow
#In confint.boot(results, level = 0.95, type = "bca") :
#  BCa method fails for this problem.  Using 'perc' instead

multinom_cis_logit <- round(exp(confint(logit)),2)
multinom_cis_logit12 <-confint(logit)

# plot the results
#hist(results, legend="separate")

mlogit.display2(logit)

Outcome =discrepancies; Referent group = coincidence 
                                                                      overreport
                                                                      Coeff./SE 
(Intercept)                                                           -1.04     
edad_a_ap_top_num                                                     -0.31     
fech_ap_top_num                                                       -0.19     
sexWomen                                                              0.08      
duplicates_filtered                                                   0.06      
macrozonaNorth                                                        0.07      
macrozonaSouth                                                        -0.13     
escolaridad_rec.L                                                     -0.18     
escolaridad_rec.Q                                                     -0.05     
estado_conyugal_2Separated/Divorced                                   -0.15     
estado_conyugal_2Single                                               -0.03     
estado_conyugal_2Widower                                              -0.05     
freq_cons_sus_prin2 to 3 days a week                                  0.15      
freq_cons_sus_prin4 to 6 days a week                                  0.24      
freq_cons_sus_prinDaily                                               0.29      
freq_cons_sus_prinLess than 1 day a week                              -0.07     
via_adm_sus_prin_actIntranasal (powder aspiration)                    -0.13     
via_adm_sus_prin_actOther                                             0.79      
via_adm_sus_prin_actSmoked or Pulmonary Aspiration                    -0.22     
con_quien_vive_joelFamily of origin                                   0.03      
con_quien_vive_joelOthers                                             0.05      
con_quien_vive_joelWith couple/children                               0.11      
sus_principal_modCocaine hydrochloride                                0.21      
sus_principal_modMarijuana                                            0.09      
sus_principal_modOther                                                0.06      
sus_principal_modCocaine paste                                        0.38      
origen_ingreso_modAssisted Referral                                   0.04      
origen_ingreso_modOther                                               0.08      
origen_ingreso_modJustice Sector                                      -0.24     
origen_ingreso_modHealth Sector                                       -0.08     
numero_de_hijos_mod                                                   0.05      
tenencia_de_la_vivienda_modOthers                                     -0.2      
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends -0.23     
tenencia_de_la_vivienda_modRenting                                    -0.25     
tenencia_de_la_vivienda_modStays temporarily with a relative          -0.27     
sus_ini_mod_mvvCocaine hydrochloride                                  0.08      
sus_ini_mod_mvvMarijuana                                              0.1       
sus_ini_mod_mvvOther                                                  0.17      
sus_ini_mod_mvvCocaine paste                                          -0.07     
condicion_ocupacional_corrInactive                                    0.07      
condicion_ocupacional_corrLooking for a job for the first time        0.36      
condicion_ocupacional_corrNo activity                                 -0.06     
condicion_ocupacional_corrNot seeking for work                        -0.11     
condicion_ocupacional_corrUnemployed                                  0.11      
dg_trs_cons_sus_orHazardous consumption                               -0.1      
comorbidity_icd_10One                                                 -0.25     
comorbidity_icd_10Two or more                                         -0.26     
comorbidity_icd_10Without psychiatric comorbidity                     -0.49     
clas_rMixta                                                           0.09      
clas_rRural                                                           0.06      
classificationPublic Hospital                                         -0.08     
classificationPublic Mental Health Primary Care                       -0.05     
classificationPublic Primary Care                                     -0.26     
classificationPublic Therapeutic Community                            -0.06     
                                                                                     
                                                                      RRR(95%CI)     
(Intercept)                                                           -              
edad_a_ap_top_num                                                     0.74(0.71,0.76)
fech_ap_top_num                                                       0.83(0.81,0.85)
sexWomen                                                              1.09(1.03,1.15)
duplicates_filtered                                                   1.07(1.04,1.09)
macrozonaNorth                                                        1.07(1.01,1.14)
macrozonaSouth                                                        0.88(0.81,0.95)
escolaridad_rec.L                                                     0.84(0.8,0.88) 
escolaridad_rec.Q                                                     0.95(0.92,0.99)
estado_conyugal_2Separated/Divorced                                   0.86(0.79,0.95)
estado_conyugal_2Single                                               0.97(0.92,1.04)
estado_conyugal_2Widower                                              0.95(0.72,1.25)
freq_cons_sus_prin2 to 3 days a week                                  1.16(1.04,1.28)
freq_cons_sus_prin4 to 6 days a week                                  1.27(1.14,1.42)
freq_cons_sus_prinDaily                                               1.33(1.2,1.48) 
freq_cons_sus_prinLess than 1 day a week                              0.93(0.8,1.08) 
via_adm_sus_prin_actIntranasal (powder aspiration)                    0.88(0.59,1.33)
via_adm_sus_prin_actOther                                             2.21(1.27,3.84)
via_adm_sus_prin_actSmoked or Pulmonary Aspiration                    0.8(0.6,1.08)  
con_quien_vive_joelFamily of origin                                   1.03(0.94,1.13)
con_quien_vive_joelOthers                                             1.05(0.94,1.18)
con_quien_vive_joelWith couple/children                               1.11(1.02,1.22)
sus_principal_modCocaine hydrochloride                                1.23(0.81,1.86)
sus_principal_modMarijuana                                            1.09(0.8,1.5)  
sus_principal_modOther                                                1.06(0.85,1.31)
sus_principal_modCocaine paste                                        1.46(1.08,1.97)
origen_ingreso_modAssisted Referral                                   1.04(0.96,1.12)
origen_ingreso_modOther                                               1.08(0.97,1.2) 
origen_ingreso_modJustice Sector                                      0.79(0.73,0.85)
origen_ingreso_modHealth Sector                                       0.92(0.87,0.98)
numero_de_hijos_mod                                                   1.05(1.02,1.08)
tenencia_de_la_vivienda_modOthers                                     0.82(0.66,1.01)
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends 0.8(0.67,0.94) 
tenencia_de_la_vivienda_modRenting                                    0.78(0.66,0.93)
tenencia_de_la_vivienda_modStays temporarily with a relative          0.77(0.65,0.91)
sus_ini_mod_mvvCocaine hydrochloride                                  1.08(0.96,1.22)
sus_ini_mod_mvvMarijuana                                              1.11(1.05,1.17)
sus_ini_mod_mvvOther                                                  1.18(1.03,1.36)
sus_ini_mod_mvvCocaine paste                                          0.93(0.85,1.03)
condicion_ocupacional_corrInactive                                    1.07(0.99,1.17)
condicion_ocupacional_corrLooking for a job for the first time        1.43(0.95,2.14)
condicion_ocupacional_corrNo activity                                 0.94(0.86,1.03)
condicion_ocupacional_corrNot seeking for work                        0.89(0.72,1.1) 
condicion_ocupacional_corrUnemployed                                  1.11(1.06,1.18)
dg_trs_cons_sus_orHazardous consumption                               0.91(0.86,0.96)
comorbidity_icd_10One                                                 0.78(0.73,0.83)
comorbidity_icd_10Two or more                                         0.77(0.67,0.89)
comorbidity_icd_10Without psychiatric comorbidity                     0.61(0.57,0.65)
clas_rMixta                                                           1.09(1.02,1.17)
clas_rRural                                                           1.06(0.98,1.14)
classificationPublic Hospital                                         0.92(0.86,0.99)
classificationPublic Mental Health Primary Care                       0.95(0.89,1.01)
classificationPublic Primary Care                                     0.77(0.71,0.84)
classificationPublic Therapeutic Community                            0.94(0.86,1.03)
                                                                      underreport
                                                                      Coeff./SE  
(Intercept)                                                           -3.82      
edad_a_ap_top_num                                                     -0.2       
fech_ap_top_num                                                       -0.21      
sexWomen                                                              -0.04      
duplicates_filtered                                                   -0.08      
macrozonaNorth                                                        0.09       
macrozonaSouth                                                        0.18       
escolaridad_rec.L                                                     -0.09      
escolaridad_rec.Q                                                     -0.07      
estado_conyugal_2Separated/Divorced                                   -0.02      
estado_conyugal_2Single                                               -0.09      
estado_conyugal_2Widower                                              -0.81      
freq_cons_sus_prin2 to 3 days a week                                  0.23       
freq_cons_sus_prin4 to 6 days a week                                  0.27       
freq_cons_sus_prinDaily                                               0.19       
freq_cons_sus_prinLess than 1 day a week                              0.4        
via_adm_sus_prin_actIntranasal (powder aspiration)                    -8.29      
via_adm_sus_prin_actOther                                             1.93       
via_adm_sus_prin_actSmoked or Pulmonary Aspiration                    0.98       
con_quien_vive_joelFamily of origin                                   -0.18      
con_quien_vive_joelOthers                                             -0.15      
con_quien_vive_joelWith couple/children                               -0.32      
sus_principal_modCocaine hydrochloride                                8.18       
sus_principal_modMarijuana                                            -0.69      
sus_principal_modOther                                                -0.16      
sus_principal_modCocaine paste                                        -1.01      
origen_ingreso_modAssisted Referral                                   -0.27      
origen_ingreso_modOther                                               -0.4       
origen_ingreso_modJustice Sector                                      0.09       
origen_ingreso_modHealth Sector                                       -0.08      
numero_de_hijos_mod                                                   0.1        
tenencia_de_la_vivienda_modOthers                                     -0.08      
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends 0.03       
tenencia_de_la_vivienda_modRenting                                    -0.03      
tenencia_de_la_vivienda_modStays temporarily with a relative          0.11       
sus_ini_mod_mvvCocaine hydrochloride                                  -0.13      
sus_ini_mod_mvvMarijuana                                              -0.06      
sus_ini_mod_mvvOther                                                  0.22       
sus_ini_mod_mvvCocaine paste                                          0.13       
condicion_ocupacional_corrInactive                                    -0.13      
condicion_ocupacional_corrLooking for a job for the first time        -0.28      
condicion_ocupacional_corrNo activity                                 -0.18      
condicion_ocupacional_corrNot seeking for work                        -0.73      
condicion_ocupacional_corrUnemployed                                  -0.27      
dg_trs_cons_sus_orHazardous consumption                               -0.17      
comorbidity_icd_10One                                                 -0.44      
comorbidity_icd_10Two or more                                         -0.54      
comorbidity_icd_10Without psychiatric comorbidity                     -0.56      
clas_rMixta                                                           0.01       
clas_rRural                                                           0.2        
classificationPublic Hospital                                         0.14       
classificationPublic Mental Health Primary Care                       0.2        
classificationPublic Primary Care                                     -0.03      
classificationPublic Therapeutic Community                            0.11       
                                                                                              
                                                                      RRR(95%CI)              
(Intercept)                                                           -                       
edad_a_ap_top_num                                                     0.82(0.73,0.92)         
fech_ap_top_num                                                       0.81(0.74,0.89)         
sexWomen                                                              0.96(0.75,1.24)         
duplicates_filtered                                                   0.92(0.83,1.02)         
macrozonaNorth                                                        1.09(0.84,1.42)         
macrozonaSouth                                                        1.2(0.92,1.57)          
escolaridad_rec.L                                                     0.92(0.74,1.13)         
escolaridad_rec.Q                                                     0.93(0.8,1.08)          
estado_conyugal_2Separated/Divorced                                   0.98(0.69,1.37)         
estado_conyugal_2Single                                               0.91(0.71,1.16)         
estado_conyugal_2Widower                                              0.45(0.11,1.83)         
freq_cons_sus_prin2 to 3 days a week                                  1.26(0.84,1.89)         
freq_cons_sus_prin4 to 6 days a week                                  1.31(0.85,2.01)         
freq_cons_sus_prinDaily                                               1.21(0.8,1.82)          
freq_cons_sus_prinLess than 1 day a week                              1.5(0.88,2.55)          
via_adm_sus_prin_actIntranasal (powder aspiration)                    0(0,0)                  
via_adm_sus_prin_actOther                                             6.87(1.53,30.82)        
via_adm_sus_prin_actSmoked or Pulmonary Aspiration                    2.66(0.52,13.69)        
con_quien_vive_joelFamily of origin                                   0.84(0.6,1.17)          
con_quien_vive_joelOthers                                             0.86(0.56,1.32)         
con_quien_vive_joelWith couple/children                               0.73(0.52,1.02)         
sus_principal_modCocaine hydrochloride                                3551.35(3049.91,4135.23)
sus_principal_modMarijuana                                            0.5(0.09,2.68)          
sus_principal_modOther                                                0.85(0.33,2.2)          
sus_principal_modCocaine paste                                        0.36(0.07,1.89)         
origen_ingreso_modAssisted Referral                                   0.76(0.53,1.09)         
origen_ingreso_modOther                                               0.67(0.39,1.13)         
origen_ingreso_modJustice Sector                                      1.1(0.84,1.43)          
origen_ingreso_modHealth Sector                                       0.92(0.73,1.15)         
numero_de_hijos_mod                                                   1.1(0.99,1.23)          
tenencia_de_la_vivienda_modOthers                                     0.93(0.35,2.42)         
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends 1.03(0.48,2.23)         
tenencia_de_la_vivienda_modRenting                                    0.97(0.44,2.12)         
tenencia_de_la_vivienda_modStays temporarily with a relative          1.12(0.52,2.43)         
sus_ini_mod_mvvCocaine hydrochloride                                  0.88(0.5,1.53)          
sus_ini_mod_mvvMarijuana                                              0.94(0.75,1.18)         
sus_ini_mod_mvvOther                                                  1.25(0.73,2.12)         
sus_ini_mod_mvvCocaine paste                                          1.13(0.75,1.71)         
condicion_ocupacional_corrInactive                                    0.88(0.62,1.25)         
condicion_ocupacional_corrLooking for a job for the first time        0.75(0.1,5.51)          
condicion_ocupacional_corrNo activity                                 0.83(0.57,1.23)         
condicion_ocupacional_corrNot seeking for work                        0.48(0.15,1.52)         
condicion_ocupacional_corrUnemployed                                  0.76(0.61,0.95)         
dg_trs_cons_sus_orHazardous consumption                               0.84(0.67,1.06)         
comorbidity_icd_10One                                                 0.64(0.49,0.84)         
comorbidity_icd_10Two or more                                         0.58(0.31,1.08)         
comorbidity_icd_10Without psychiatric comorbidity                     0.57(0.43,0.76)         
clas_rMixta                                                           1.01(0.76,1.35)         
clas_rRural                                                           1.22(0.92,1.63)         
classificationPublic Hospital                                         1.15(0.86,1.54)         
classificationPublic Mental Health Primary Care                       1.22(0.94,1.59)         
classificationPublic Primary Care                                     0.97(0.71,1.33)         
classificationPublic Therapeutic Community                            1.12(0.77,1.63)         

Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1  

Residual Deviance: 54121.82 
AIC = 54337.82 
Show code
tibble(output_logit) %>% 
      dplyr::slice(-1) %>% 
      tidyr::separate(V2, sep="\\(", into= c("RRR_ov","CI_ov")) %>% 
      tidyr::separate(CI_ov, sep="\\,", into= c("ci_lo_ov","ci_up_ov")) %>% 
      tidyr::separate(V4, sep="\\(", into= c("RRR_und","CI_und")) %>% 
      tidyr::separate(CI_und, sep="\\,", into= c("ci_lo_und","ci_up_und")) %>% 
      dplyr::mutate(ci_up_ov= gsub("\\)","",ci_up_ov)) %>% 
      dplyr::mutate(ci_up_und= gsub("\\)","",ci_up_und)) %>% 
#output_logit %>% 
  knitr::kable("markdown", caption="Outputs from the multinomial model")
Table 2: Outputs from the multinomial model
rn overreport RRR_ov ci_lo_ov ci_up_ov underreport RRR_und ci_lo_und ci_up_und
(Intercept) -1.04 - -3.82 -
edad_a_ap_top_num -0.31 0.74 0.71 0.76 -0.2 0.82 0.73 0.92
fech_ap_top_num -0.19 0.83 0.81 0.85 -0.21 0.81 0.74 0.89
sexWomen 0.08 1.09 1.03 1.15 -0.04 0.96 0.75 1.24
duplicates_filtered 0.06 1.07 1.04 1.09 -0.08 0.92 0.83 1.02
macrozonaNorth 0.07 1.07 1.01 1.14 0.09 1.09 0.84 1.42
macrozonaSouth -0.13 0.88 0.81 0.95 0.18 1.2 0.92 1.57
escolaridad_rec.L -0.18 0.84 0.8 0.88 -0.09 0.92 0.74 1.13
escolaridad_rec.Q -0.05 0.95 0.92 0.99 -0.07 0.93 0.8 1.08
estado_conyugal_2Separated/Divorced -0.15 0.86 0.79 0.95 -0.02 0.98 0.69 1.37
estado_conyugal_2Single -0.03 0.97 0.92 1.04 -0.09 0.91 0.71 1.16
estado_conyugal_2Widower -0.05 0.95 0.72 1.25 -0.81 0.45 0.11 1.83
freq_cons_sus_prin2 to 3 days a week 0.15 1.16 1.04 1.28 0.23 1.26 0.84 1.89
freq_cons_sus_prin4 to 6 days a week 0.24 1.27 1.14 1.42 0.27 1.31 0.85 2.01
freq_cons_sus_prinDaily 0.29 1.33 1.2 1.48 0.19 1.21 0.8 1.82
freq_cons_sus_prinLess than 1 day a week -0.07 0.93 0.8 1.08 0.4 1.5 0.88 2.55
via_adm_sus_prin_actIntranasal (powder aspiration) -0.13 0.88 0.59 1.33 -8.29 0 0 0
via_adm_sus_prin_actOther 0.79 2.21 1.27 3.84 1.93 6.87 1.53 30.82
via_adm_sus_prin_actSmoked or Pulmonary Aspiration -0.22 0.8 0.6 1.08 0.98 2.66 0.52 13.69
con_quien_vive_joelFamily of origin 0.03 1.03 0.94 1.13 -0.18 0.84 0.6 1.17
con_quien_vive_joelOthers 0.05 1.05 0.94 1.18 -0.15 0.86 0.56 1.32
con_quien_vive_joelWith couple/children 0.11 1.11 1.02 1.22 -0.32 0.73 0.52 1.02
sus_principal_modCocaine hydrochloride 0.21 1.23 0.81 1.86 8.18 3551.35 3049.91 4135.23
sus_principal_modMarijuana 0.09 1.09 0.8 1.5 -0.69 0.5 0.09 2.68
sus_principal_modOther 0.06 1.06 0.85 1.31 -0.16 0.85 0.33 2.2
sus_principal_modCocaine paste 0.38 1.46 1.08 1.97 -1.01 0.36 0.07 1.89
origen_ingreso_modAssisted Referral 0.04 1.04 0.96 1.12 -0.27 0.76 0.53 1.09
origen_ingreso_modOther 0.08 1.08 0.97 1.2 -0.4 0.67 0.39 1.13
origen_ingreso_modJustice Sector -0.24 0.79 0.73 0.85 0.09 1.1 0.84 1.43
origen_ingreso_modHealth Sector -0.08 0.92 0.87 0.98 -0.08 0.92 0.73 1.15
numero_de_hijos_mod 0.05 1.05 1.02 1.08 0.1 1.1 0.99 1.23
tenencia_de_la_vivienda_modOthers -0.2 0.82 0.66 1.01 -0.08 0.93 0.35 2.42
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends -0.23 0.8 0.67 0.94 0.03 1.03 0.48 2.23
tenencia_de_la_vivienda_modRenting -0.25 0.78 0.66 0.93 -0.03 0.97 0.44 2.12
tenencia_de_la_vivienda_modStays temporarily with a relative -0.27 0.77 0.65 0.91 0.11 1.12 0.52 2.43
sus_ini_mod_mvvCocaine hydrochloride 0.08 1.08 0.96 1.22 -0.13 0.88 0.5 1.53
sus_ini_mod_mvvMarijuana 0.1 1.11 1.05 1.17 -0.06 0.94 0.75 1.18
sus_ini_mod_mvvOther 0.17 1.18 1.03 1.36 0.22 1.25 0.73 2.12
sus_ini_mod_mvvCocaine paste -0.07 0.93 0.85 1.03 0.13 1.13 0.75 1.71
condicion_ocupacional_corrInactive 0.07 1.07 0.99 1.17 -0.13 0.88 0.62 1.25
condicion_ocupacional_corrLooking for a job for the first time 0.36 1.43 0.95 2.14 -0.28 0.75 0.1 5.51
condicion_ocupacional_corrNo activity -0.06 0.94 0.86 1.03 -0.18 0.83 0.57 1.23
condicion_ocupacional_corrNot seeking for work -0.11 0.89 0.72 1.1 -0.73 0.48 0.15 1.52
condicion_ocupacional_corrUnemployed 0.11 1.11 1.06 1.18 -0.27 0.76 0.61 0.95
dg_trs_cons_sus_orHazardous consumption -0.1 0.91 0.86 0.96 -0.17 0.84 0.67 1.06
comorbidity_icd_10One -0.25 0.78 0.73 0.83 -0.44 0.64 0.49 0.84
comorbidity_icd_10Two or more -0.26 0.77 0.67 0.89 -0.54 0.58 0.31 1.08
comorbidity_icd_10Without psychiatric comorbidity -0.49 0.61 0.57 0.65 -0.56 0.57 0.43 0.76
clas_rMixta 0.09 1.09 1.02 1.17 0.01 1.01 0.76 1.35
clas_rRural 0.06 1.06 0.98 1.14 0.2 1.22 0.92 1.63
classificationPublic Hospital -0.08 0.92 0.86 0.99 0.14 1.15 0.86 1.54
classificationPublic Mental Health Primary Care -0.05 0.95 0.89 1.01 0.2 1.22 0.94 1.59
classificationPublic Primary Care -0.26 0.77 0.71 0.84 -0.03 0.97 0.71 1.33
classificationPublic Therapeutic Community -0.06 0.94 0.86 1.03 0.11 1.12 0.77 1.63
Show code
  # knitr::kable("html", caption="Outputs from the multinomial model") %>% 
  # kableExtra::kable_classic()

Time for this code chunk to run: 0.2 minutes

Show code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#Base_fiscalia_v13c_dic_2022_4$discrepancies <- factor(Base_fiscalia_v13c_dic_2022_4$discrepancies,
#  levels = c("admin", "manage", "custodial"), ordered = FALSE)

# bw_vglm <- VGAM::vglm(formula= form, family = multinomial,
#   data = bwmale2)
# coef(bw_vglm, matrix = TRUE)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#https://rubenfcasal.github.io/aprendizaje_estadistico/cart-con-el-paquete-rpart.html

set.seed(2125)
ind <- sample(x = 2, size = nrow(Base_fiscalia_v13c_dic_2022_4), replace = T, prob = c(0.7, 0.3))
train <- Base_fiscalia_v13c_dic_2022_4[ind == 1, ]
test <- Base_fiscalia_v13c_dic_2022_4[ind == 2, ]

mod1.2 <- rpart(form, data = train)
#mod1 #print(mod1)
#summary(mod1)
rpart.plot(mod1.2)
rpart.rules(mod1.2, style = "tall")

form2<-
as.formula(paste("discrepancies~ ", paste(setdiff(c(vars_db, "numero_de_hijos_mod", "edad_a_ap_top_num", "fech_ap_top"), c("id_centro", "comuna_residencia_cod")), collapse="+ ")))

form2_multilev<-
as.formula(paste("discrepancies~ ", paste(setdiff(c(vars_db, "numero_de_hijos_mod", "edad_a_ap_top_num", "fech_ap_top"), c("id_centro", "comuna_residencia_cod")), collapse="+ "), "| HASH_KEY"))

mod1.3 <- rpart(form2, data = train, cp=0)

head(mod1.3$cptable, 10)

xerror <- mod1.3$cptable[,"xerror"]
imin.xerror <- which.min(xerror)
# Valor óptimo
mod1.3$cptable[imin.xerror, ]
upper.xerror <- xerror[imin.xerror] + mod1.3$cptable[imin.xerror, "xstd"]
icp <- min(which(xerror <= upper.xerror))
cp <- mod1.3$cptable[icp, "CP"]
tree <- prune(mod1.3, cp = cp)
rpart.plot(tree, box.palette = "Blues") 

invisible("can take more than 15 hours, because the model was taking municipallity and center ID!")

#tm_cl <- mlogit::mlogit(formula= form_multilev, data=Base_fiscalia_v13c_dic_2022_4)#, reflevel = "coincidence")
#pivot_longer(Base_fiscalia_v13c_dic_2022_4, id=c("HASH_KEY", "edad_a_ap_top_num"))
#modelsummary(list("Conditional logit"=tm_cl), 
#             fmt=3, estimate="{estimate}{stars}")

Time for this code chunk to run: 0 minutes

Flowchart

Show code
a_tab1_lab_aft_d<- paste0('Original C1 Dataset \n(p= ', formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0), ';\np= ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0),')')

a_tab2_lab_aft_d<- paste0('&#8226;Remove duplicated entries\\\\\\l&#8226;Overlapping treatments of patients\\\\\\l&#8226;Intermediate treatment events (continuous referrals)    \\\\\\l')

a_tab3_lab_aft_d<- paste0('      C1 Dataset          \n(p= ', formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0), ';\nn= ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0),')')


a_tab4_lab_aft_d<- paste0('Original Prosecutors Office\n(p= ',Base_fiscalia_v2%>% dplyr::distinct(rut_enc_saf)%>% nrow() %>% format(big.mark=','), ';\nCauses= ',Base_fiscalia_v2%>% dplyr::distinct(ruc)%>% nrow() %>% format(big.mark=','), ';\nRel.=',Base_fiscalia_v2%>%dplyr::distinct(idrelacion)%>%nrow()%>%format(big.mark=','), ';\nRUC_Vic_Imp=',Base_fiscalia_v2%>%dplyr::mutate(rel=paste0(ruc,"_",idsujeto_victima,"_",idsujeto_imputado,"_","iddelito"))%>%dplyr::distinct(rel)%>%nrow()%>%format(big.mark=','), ';\nn= ',format(nrow(Base_fiscalia_v2),big.mark=","),')')

#crimes committed after study follow-up
a_tab5_lab_aft_d1<-
    paste0("(p= ",format(nrow(unique(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"rut_enc_saf"))),big.mark=","),"; RUCs= ", format(nrow(unique(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"ruc"))), big.mark=","),";n= ", format(nrow(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"rut_enc_saf")), big.mark=","),")")
#erase entries with missing values in fec_comision_simple y termino_relacion_simple
leftovers_Base_fiscalia_v3<-
Base_fiscalia_v3 %>% 
  dplyr::left_join(after_imp_Base_fiscalia_v3_db[,c("rut_enc_saf","imp_birth_date","flowch_age")], by="rut_enc_saf") %>% 
  dplyr::rename("obs"="flowch_age") %>% 
  dplyr::mutate(imp_birth_date=dplyr::case_when(!is.na(imp_birth_date)~imp_birth_date,T~fec_nacimiento_simple))%>% dplyr::mutate(edad_comision_imp=as.numeric(fec_comision_simple-imp_birth_date)/365.25) %>% dplyr::mutate(edad_ter_rel_imp=as.numeric(termino_relacion_simple-imp_birth_date)/365.25) %>%
  #arrange the rut from the first date of comission of a crime, but we are not detecting if he/she is the victim or not
  dplyr::arrange(rut_enc_saf, edad_comision_imp) %>% #566884
  dplyr::filter(dplyr::case_when(!is.na(edad_comision_imp)~T,T~F)) %>% #566644
  dplyr::filter(imp_birth_date=="1900-01-01"|is.na(imp_birth_date))

a_tab5_lab_aft_d12<-
    paste0("(p= ",format(nrow(dplyr::distinct(leftovers_Base_fiscalia_v3,rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(leftovers_Base_fiscalia_v3,ruc)), big.mark=","),";n= ",
           format(nrow(leftovers_Base_fiscalia_v3), big.mark=","),")")

#minor to 14 years old
a_tab5_lab_aft_d13<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14)), big.mark=","),")")

#Remove duplicated entries
a_tab5_lab_aft_d2<-
  paste0("(p= ",format(length(unique(eliminated_duplicates$rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(length(unique(eliminated_duplicates$ruc)), big.mark=","),";n= ",
           format(nrow(eliminated_duplicates), big.mark=","),")")
#before 2010
a_tab5_lab_aft_d3<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01"),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01"),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01")), big.mark=","),")")
#remove administrative annulment
a_tab5_lab_aft_d4<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA"),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA"),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA")), big.mark=","),")")
#remove grouped to another case
a_tab5_lab_aft_d5<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO"),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO"),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO")), big.mark=","),")")

a_tab5_lab_aft_d<- paste0('&#8226;Filter crimes committed after study follow-up period',a_tab5_lab_aft_d1,'\\\\\\l&#8226;Remove duplicated entries',a_tab5_lab_aft_d2,'\\\\\\l&#8226;Correct dates (birth, comission of crime, end of judicial proceedings), missing nationality and sex\\\\\\l&#8226;Erase entries with missing values in  comission of crime, end of judicial proceedings',a_tab5_lab_aft_d12,'\\\\\\l&#8226;Erase entries with values in comission of crime when minor to 14 years old after imputation',a_tab5_lab_aft_d13,'\\\\\\l&#8226;Filter crimes committed before study follow-up',a_tab5_lab_aft_d3,'\\\\\\l&#8226;Filter records with cause of end of the proceedings= administrative annulment',a_tab5_lab_aft_d4,'\\\\\\l&#8226;Filter records with cause of end of the proceedings= grouped to another case',a_tab5_lab_aft_d5,'\\\\\\l')

a_tab6_lab_aft_d<- paste0("O.P. Dataset \n(p= ", dplyr::distinct(Base_fiscalia_v8, rut_enc_saf)%>% nrow()%>% formatC(big.mark = ","), ";\nn= ",formatC(nrow(Base_fiscalia_v8),big.mark = ","),")")

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#DISCARD not coded as an offender
a_tab7_lab_aft_d1<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado))), big.mark=","),")")

#FILTER IF THE PATIENT RECIEVES FOR THE RELATIONSHIP AMONG THOSE THAT WERE OFFENDERS
# end of proceeding  === SIMILAR TO Base_fiscalia_v9
a_tab7_lab_aft_d2<-
      paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0)), big.mark=","),")")

CONS_TOP_2022_anti<-
  # 107307
  CONS_TOP%>% 
  dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v11b_dic_2022, hash_key), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "hash_key"))%>% 
  dplyr::mutate(fech_ap_top_num= as.numeric(as.Date(str_sub(as.character(lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T)),1,10))))%>% #No parse failures
  dplyr::select(HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, dateofbirth_imp, Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro) %>% 
  dplyr::filter(!is.na(HASH_KEY)) %>% 
  dplyr::mutate_at(vars("Hurto", "Robo", "Venta.Drogas", "Riña", "Otro"), ~ifelse(.=="S",1,0)) %>% 
  dplyr::mutate(Total.VIF= ifelse(Total.VIF>0,1,0))%>% 
  dplyr::mutate(tot_off_top = base::rowSums(dplyr::select(.,c(Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro)), na.rm = T)) %>% 
  dplyr::mutate(dateofbirth_imp_num= as.numeric(dateofbirth_imp),
                fech_ap_top= lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T),
                edad_a_ap_top_num= lubridate::time_length(lubridate::interval(dateofbirth_imp, fech_ap_top),unit="years"),
                edad_b_ap_top_num= (fech_ap_top_num-dateofbirth_imp_num)/365.25,
                edad_a_ap_top_num_lim= edad_a_ap_top_num-(1/12),
                edad_b_ap_top_num_lim= edad_b_ap_top_num-(1/12)) %>% 
  dplyr::select(-dateofbirth_imp, -dateofbirth_imp_num) %>% 
  dplyr::filter(!is.na(edad_a_ap_top_num)) %>% 
  dplyr::group_by(HASH_KEY, edad_a_ap_top_num) %>% 
  dplyr::slice(-1) %>% 
  dplyr::ungroup()

a_tab7_lab_aft_d25<-
        paste0("(p= ",format(length(unique(CONS_TOP_2022_anti$HASH_KEY)),big.mark=","),"; n= ",
           format(length(CONS_TOP_2022_anti$HASH_KEY), big.mark=","),")")


a_tab7_lab_aft_d3<-
        paste0("(p= ",format(nrow(dplyr::distinct(dplyr::anti_join(CONS_C1_df_dup_SEP_2020, CONS_C1_df_dup_SEP_2020_22_d, by=c("hash_key","dup")),hash_key)),big.mark=","),"; n= ",
           format(nrow(dplyr::anti_join(CONS_C1_df_dup_SEP_2020, CONS_C1_df_dup_SEP_2020_22_d, by=c("hash_key","dup"))), big.mark=","),")")

a_tab8_lab_aft_d <- paste0('Offenses previous\nto the first admission\n',"(p= ",format(length(unique(Base_fiscalia_v10b_dic_2022[,"hash_key"])),big.mark=","),"; RUCs= ",
           format(length(unique(Base_fiscalia_v10b_dic_2022[,"caseid"])), big.mark=","),";\nn= ",
           format(length(Base_fiscalia_v10b_dic_2022[,"caseid"]), big.mark=","),")")

a_tab9_lab_aft<- paste0('&#8226;Discard observations coded as victims rather than ofenders ',tab7_lab_aft_d1,'\\\\\\l&#8226;Discard observations depending on the values of the end of the proceedings among offenders ',tab7_lab_aft_d2,'\\\\\\l&#8226;Discard TOP applications with duplicated dates and user ',a_tab7_lab_aft_d25,'\\\\\\l&#8226;Get the first treatment of each user ',a_tab7_lab_aft_d3,'\\\\\\l')

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#//  ON x.hash_key == y.id AND   x.edad_al_egres_imp > y.age_offending_imp AND x.dup = 1"

library(DiagrammeR) 
plot_merge_flowchart_excercise<-
  grViz("digraph flowchart {
        fontname='Comic Sans MS'
        # node definitions with substituted label text
        node [shape = rectangle,fontsize = 9]        
        tab1 [label = '@@1']
        blank [label = '', width = 0.0001, height = 0.0001]
        tab2 [label = '@@2',fontsize = 7]
        tab3 [label = '@@3']
        
        tab4 [label = '@@4',fontsize = 8]
        blank2 [label = '', width = 0.0001, height = 0.0001]
        tab5 [label = '@@5',fontsize = 7]
        tab6 [label= '@@6']
        
        blank3 [label = '', width = 0.0001, height = 0.0001]
        blank4 [label = '', width = 0.0001, height = 0.0001]      
        blank5 [label = '', width = 0.0001, height = 0.0001] 
        
        blank6 [label = '', width = 0.0001, height = 0.0001]
        tab7 [label = '@@7',fontsize = 7]
        tab8 [label= '@@8']
        
        # edge definitions with the node IDs
        rankdir='TB'; rank= same; tab1 -> blank [arrowhead = none,label='        Data wrangling and normalization process',fontsize = 8];
        rankdir='TB'; rank= same; tab1; tab3;   
        blank -> tab2;
                    subgraph {
                rank = same; tab2; blank;
                    }
        rankdir='TB'; rank= same; blank -> tab3;      
              
        tab4 -> blank2 [arrowhead = none,label='        Data wrangling and normalization process',fontsize = 8];
        blank2 -> tab5
        blank2 -> tab6
        blank4 -> blank6 [arrowhead= none, label='  &#10198;']
        blank6 -> tab7 
                    subgraph {
          rank = same; blank6; tab7;
                    }
        # bring_db1_a                    
        blank6 -> tab8 [label= '        Merge records where discharge age is greater than age of offenses', fontsize = 8] 
                    subgraph {
          rank = same; tab5; blank2;
                    }
                    subgraph {
        rank= same; tab3 -> blank3 -> blank4 -> blank5 -> tab6 [arrowhead= none]
                    }
        }
                    subgraph {
          rank = same; tab3; tab6;
                    }
                    subgraph {
          rank = same; tab1; tab4;
                    }         
                    subgraph {
          rank = same; tab2; tab5;
                    } 
                    subgraph {
          rank = same; tab1; tab3;
          rankdir=TB
          rank=same
                    }
                    subgraph {
          rank = same; tab4; tab6;
          rankdir=TB
          rank=same
                    }                     
  
        [1]:  a_tab1_lab_aft_d
        [2]:  a_tab2_lab_aft_d
        [3]:  a_tab3_lab_aft_d
        [4]:  a_tab4_lab_aft_d
        [5]:  a_tab5_lab_aft_d
        [6]:  a_tab6_lab_aft_d
        [7]:  a_tab9_lab_aft
        [8]:  a_tab8_lab_aft_d
        ", width = 1200,
          height = 900)

DPI = 1200
WidthCM = 11
HeightCM = 8

plot_merge_flowchart_excercise
Show code
library(DiagrammeRsvg)

plot_merge_flowchart_excercise %>%
  export_svg %>% charToRaw %>% rsvg::rsvg_pdf("./_figs/_flowchart_comm_DAD.pdf")
plot_merge_flowchart_excercise %>% DiagrammeRsvg::export_svg()%>%charToRaw %>% rsvg::rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("./_figs/_flowchart_comm_DAD.png")

htmlwidgets::saveWidget(plot_merge_flowchart_excercise, "./_figs/_flowchart_comm_DAD.html")
webshot::webshot("./_figs/_flowchart_comm_DAD.html", "./_figs/_flowchart_comm_DAD.png",vwidth = 1200, vheight = 900, zoom = 3)
Show code
webshot::webshot("./_figs/_flowchart_comm_DAD.html", "./_figs/_flowchart_comm_DAD_alt.pdf")

Time for this code chunk to run: 0 minutes

Show code
a_tab10_lab_aft_d<- paste0("TOP Dataset \n(p= ", dplyr::distinct(CONS_TOP, HASH_KEY)%>% nrow()%>% formatC(big.mark = ","),";\nn= ",formatC(nrow(CONS_TOP),big.mark = ","),")")

#bring_db3
a_tab11_lab_aft_d1<- 
    paste0( '(p= ',format(nrow(dplyr::distinct(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup(),id)), big.mark=","), '; RUCs= ', format(nrow(dplyr::distinct(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup(),caseid)), big.mark=","), '; n= ', format(nrow(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup()), big.mark=",") , ')'
    )
message(
paste0("Patients in PO records with unique combination of ID, RUC, end type and date of comission of the offense and offense ", a_tab11_lab_aft_d1)
)

a_tab11_lab_aft_d<- paste0('&#8226;Count events such as shoplifting, theft, domestic violence, drug selling, fights, or related behavior in the last four weeks','\\\\\\l',
                           '&#8226;Get PO data with more than one record with the same case ID, sentence, date of offense and type of crime ', a_tab11_lab_aft_d1,'\\\\\\l')

a_tab12_lab_aft_d<- 
    paste0('Merged database\n(p= ',format(nrow(dplyr::distinct(Base_fiscalia_v13c_dic_2022_3,HASH_KEY)), big.mark=","), '; n= ', format(nrow(Base_fiscalia_v13c_dic_2022_3), big.mark=",") , ')'
    )


plot_merge_flowchart_excercise2<-
  grViz("digraph flowchart {
        fontname='Comic Sans MS'
        # edge definitions with the node IDs
        # node definitions with substituted label text
        node [shape = rectangle, fontsize = 9]        
        tab1 [label = '@@1', fontsize = 7]
        tab2 [label = '@@2', fontsize = 7]
        blank [label = '', width = 0.0001, height = 0.0001]
        blanka [label = '', width = 0.0001, height = 0.0001]
        blankb [label = '', width = 0.0001, height = 0.0001]
        
        # bring_db2       
          subgraph {
            rank = same; tab1 -> blanka -> blank -> blankb -> tab2 [arrowhead= none]
          }
        tab3 [label = '@@3',fontsize = 6]
        blank2 [label = '', width = 0.0001, height = 0.0001]
        blank -> blank2 [arrowhead= none, label='  &#10198;']
        subgraph {
            rank = same; blank2 -> tab3
          }
        tab4 [label = '@@4',fontsize = 9]
        blank2 -> tab4 [label='        Select records of offenses committed before the application of TOP but\n        at least the last month previous to the application of TOP \\\\\\l',fontsize = 7];
  }

        [1]:  a_tab8_lab_aft_d
        [2]:  a_tab10_lab_aft_d
        [3]:  a_tab11_lab_aft_d
        [4]:  a_tab12_lab_aft_d
        [5]:  ''
        [6]:  ''
        [7]:  ''
        [8]:  ''
        ", width = 1200,
          height = 900)

plot_merge_flowchart_excercise2
Show code
plot_merge_flowchart_excercise2 %>%
  export_svg %>% charToRaw %>% rsvg::rsvg_pdf("./_figs/_flowchart2_comm_DAD.pdf")
plot_merge_flowchart_excercise2 %>% DiagrammeRsvg::export_svg()%>%charToRaw %>% rsvg::rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("./_figs/_flowchart2_comm_DAD.png")

htmlwidgets::saveWidget(plot_merge_flowchart_excercise2, "./_figs/_flowchart2_comm_DAD.html")
webshot::webshot("./_figs/_flowchart2_comm_DAD.html", "./_figs/_flowchart2_comm_DAD.png",vwidth = 1200, vheight = 900, zoom = 3)
Show code
webshot::webshot("./_figs/_flowchart2_comm_DAD.html", "./_figs/_flowchart2_comm_DAD_alt.pdf")

Time for this code chunk to run: 0 minutes


Session Info

Show code
message(Sys.getenv("R_LIBS_USER"))

C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)/renv/library/R-4.1/x86_64-w64-mingw32;C:/Users/CISS Fondecyt/AppData/Local/Temp/RtmpgD0vKP/renv-system-library

Show code
[1] "2023-02-20"
Show code
message(paste0("Editor context: ", path))

Editor context: C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)

Show code
if (grepl("CISS Fondecyt",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)/14_alt.RData")
  } else if (grepl("andre",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("C:/Users/andre/Desktop/SUD_CL/14_alt.RData")
  } else if (grepl("E:",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("E:/Mi unidad/Alvacast/SISTRAT 2022 (github)/14_alt.RData")
  } else {
    save.image(paste0(sub("2019","2022",sub("SUD_CL","",path)),"14_alt.RData"))
  }

Error in readRDS(responseFile): error al leer desde conexión

Show code
#load(paste0(dirname(rstudioapi::getSourceEditorContext()$path),"/14_alt.RData"))

sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))

Time for this code chunk to run: 0.2 minutes

Show code
#put name of the file
file<- "fiscalia_ags_jan_2023_match_SENDA.dta"

export_lab_stata_merge<-
  tibble::rownames_to_column(data.frame(Hmisc::label(dplyr::select(Base_fiscalia_v13,  all_of(c("hash_key", cont_vars_desc, cat_vars_desc, cat_vars_desc_off, vars_desc, "fech_ing_num_1", "fech_egres_imp", "cut_com_del", "cut_fec_nac","offender_d", "comuna_residencia_cod_rec", "porc_pobr", "Clasificación"))))))%>% data.frame() %>%
  dplyr::rename("code" = !!names(.[1]), "label" = !!names(.[2]))%>% data.frame()%>%
  dplyr::mutate(first= "cap noi label variable")%>%
  dplyr::mutate(final= paste0(first, " ",code,' "',label,'"'))%>%
  dplyr::select(-code,-label,-first)%>%
  dplyr::rename("*clear all"="final") %>%
  rbind(paste0('cap noi save "', gsub('/', '\\', path, fixed=T),'\\',file,'", replace'))%>%
  rbind(paste0('cap noi save "', gsub('/', '\\', path, fixed=T),'\\',file,'", replace'))

rbind(paste0('cap noi use "', gsub('/', '\\', path, fixed=T),'\\',file,'", clear'),export_lab_stata_merge) %>% knitr::kable("markdown")

write.table(rbind(paste0('cap noi use "', gsub('/', '\\', path, fixed=T),'\\',file,'", clear'),export_lab_stata_merge), file = paste0(path,"/_label_var_to_stata_jan2023.do"), sep = "",row.names = FALSE, quote = FALSE, fileEncoding="UTF-8")

Time for this code chunk to run: 0 minutes


Show code
*should be in the same folder of the .Rmd to work
cap noi do _label_var_to_stata_jan2023
Error in running command st

Time for this code chunk to run: 0 minutes